dramaling-vocab-learning/backend/DramaLing.Api/Services/Speech/AzurePronunciationAssessmen...

1021 lines
42 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.PronunciationAssessment;
using Microsoft.Extensions.Options;
using System.Diagnostics;
using DramaLing.Api.Models.Configuration;
using DramaLing.Api.Models.DTOs;
using DramaLing.Api.Contracts.Services.Speech;
namespace DramaLing.Api.Services.Speech;
public class AzurePronunciationAssessmentService : IPronunciationAssessmentService
{
private readonly AzureSpeechOptions _options;
private readonly ILogger<AzurePronunciationAssessmentService> _logger;
public AzurePronunciationAssessmentService(
IOptions<AzureSpeechOptions> options,
ILogger<AzurePronunciationAssessmentService> logger)
{
_options = options.Value;
_logger = logger;
// 除錯:檢查 Azure 配置和所有可能的來源
var keyLength = string.IsNullOrEmpty(_options.SubscriptionKey) ? 0 : _options.SubscriptionKey.Length;
var keyPrefix = string.IsNullOrEmpty(_options.SubscriptionKey) ? "NULL" : _options.SubscriptionKey.Substring(0, Math.Min(6, _options.SubscriptionKey.Length));
var keySuffix = string.IsNullOrEmpty(_options.SubscriptionKey) ? "NULL" : _options.SubscriptionKey.Substring(Math.Max(0, _options.SubscriptionKey.Length - 6));
_logger.LogInformation("🔍 Azure Speech Services 配置載入詳情:");
_logger.LogInformation(" Region: {Region}", _options.Region);
_logger.LogInformation(" KeyLength: {KeyLength}", keyLength);
_logger.LogInformation(" KeyPrefix: {KeyPrefix}...", keyPrefix);
_logger.LogInformation(" KeySuffix: ...{KeySuffix}", keySuffix);
_logger.LogInformation(" EnableDetailedResult: {EnableDetailedResult}", _options.EnableDetailedResult);
_logger.LogInformation(" TimeoutSeconds: {TimeoutSeconds}", _options.TimeoutSeconds);
// 檢查環境變數
var envKey = Environment.GetEnvironmentVariable("AzureSpeech__SubscriptionKey");
if (!string.IsNullOrEmpty(envKey))
{
_logger.LogWarning("⚠️ 發現環境變數 AzureSpeech__SubscriptionKey: {EnvKeyPrefix}...{EnvKeySuffix}",
envKey.Substring(0, Math.Min(6, envKey.Length)),
envKey.Substring(Math.Max(0, envKey.Length - 6)));
}
if (string.IsNullOrEmpty(_options.SubscriptionKey))
{
_logger.LogError("⚠️ Azure Speech Services SubscriptionKey 為空!請檢查 User Secrets 配置");
}
else if (!_options.SubscriptionKey.StartsWith("AKV"))
{
_logger.LogWarning("⚠️ SubscriptionKey 格式看起來不正確,期望以 'AKV' 開頭");
}
}
public async Task<PronunciationResult> EvaluatePronunciationAsync(
Stream audioStream,
string referenceText,
string flashcardId,
string language = "en-US")
{
var stopwatch = Stopwatch.StartNew();
try
{
_logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language);
// 1. 驗證 Azure 配置
var configValidation = ValidateAzureConfiguration();
if (!configValidation.IsValid)
{
_logger.LogError("❌ Azure Speech Services 配置驗證失敗: {Errors}",
string.Join(", ", configValidation.Errors));
throw new InvalidOperationException($"Azure 配置錯誤: {string.Join(", ", configValidation.Errors)}");
}
_logger.LogInformation("✅ Azure Speech Services 配置驗證通過");
// 2. 設定 Azure Speech Config
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
speechConfig.SpeechRecognitionLanguage = language;
// 2. 設定發音評估參數
var pronunciationConfig = new PronunciationAssessmentConfig(
referenceText,
GradingSystem.HundredMark,
Granularity.Word,
enableMiscue: true
);
// 3. 處理音頻流 - 支援多種格式
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
AudioInputStream? audioInputStream = null;
var audioData = new List<byte>(); // 移到更高作用域以供後續錯誤處理使用
try
{
audioInputStream = AudioInputStream.CreatePushStream(audioFormat);
_logger.LogDebug("✅ AudioInputStream 已建立");
// 重置 stream position 到開頭
if (audioStream.CanSeek)
{
audioStream.Position = 0;
_logger.LogDebug("音頻流大小: {Size} bytes", audioStream.Length);
}
// 讀取所有音頻數據
var buffer = new byte[4096];
int bytesRead;
while ((bytesRead = await audioStream.ReadAsync(buffer.AsMemory(0, buffer.Length))) > 0)
{
for (int i = 0; i < bytesRead; i++)
{
audioData.Add(buffer[i]);
}
}
_logger.LogInformation("成功讀取音頻數據: {Size} bytes", audioData.Count);
if (audioData.Count == 0)
{
throw new InvalidOperationException("音頻數據為空,請重新錄製音頻並確保麥克風正常工作");
}
// 增強的音頻數據驗證
var validationResult = ValidateAudioData(audioData);
if (!validationResult.IsValid)
{
_logger.LogWarning("⚠️ 音頻數據驗證警告: {Warnings}", string.Join(", ", validationResult.Warnings));
// 如果有嚴重錯誤,直接拋出異常
if (validationResult.HasCriticalErrors)
{
throw new InvalidOperationException($"音頻數據驗證失敗: {string.Join(", ", validationResult.Errors)}");
}
}
// 驗證音頻數據的基本特徵
if (audioData.Count < 1000) // 少於 1KB 可能不是有效音頻
{
_logger.LogWarning("音頻數據過小,可能無效: {Size} bytes", audioData.Count);
}
// 詳細的音頻數據分析
_logger.LogDebug("🔊 音頻數據分析:");
_logger.LogDebug(" - 總大小: {TotalSize} bytes", audioData.Count);
_logger.LogDebug(" - 預估時長: ~{Duration:F1} 秒 (假設 16kHz 16-bit mono)",
audioData.Count / (16000.0 * 2)); // 16kHz * 2 bytes per sample
// 檢查音頻數據頭部特徵
if (audioData.Count >= 4)
{
var header = audioData.Take(4).ToArray();
var headerHex = string.Join(" ", header.Select(b => b.ToString("X2")));
_logger.LogDebug(" - 檔案頭部: {Header}", headerHex);
// 檢查常見的音頻格式標識
if (header[0] == 0x52 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x46)
{
_logger.LogDebug(" - 檢測到 WAV 格式 (RIFF header)");
}
else if (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
{
_logger.LogDebug(" - 檢測到 MP3 格式");
}
else
{
_logger.LogDebug(" - 未識別的音頻格式,可能是 raw PCM 或其他格式");
}
}
// 檢查音頻數據的動態範圍(簡單的音量檢測)
if (audioData.Count > 100)
{
var sampleValues = new List<short>();
for (int i = 0; i < Math.Min(audioData.Count - 1, 1000); i += 2)
{
if (i + 1 < audioData.Count)
{
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
sampleValues.Add(Math.Abs(sample));
}
}
if (sampleValues.Count > 0)
{
var maxAmplitude = sampleValues.Max();
var avgAmplitude = sampleValues.Select(s => (double)s).Average();
_logger.LogDebug(" - 最大振幅: {Max}", maxAmplitude);
_logger.LogDebug(" - 平均振幅: {Avg:F1}", avgAmplitude);
if (maxAmplitude < 100)
{
_logger.LogWarning("⚠️ 音頻音量可能過低 (最大振幅: {Max})", maxAmplitude);
}
else if (avgAmplitude < 10)
{
_logger.LogWarning("⚠️ 音頻平均音量過低,可能包含過多靜音");
}
}
}
// 嘗試音頻數據恢復策略(如果需要)
var processedAudioData = audioData;
if (!validationResult.IsValid && !validationResult.HasCriticalErrors)
{
_logger.LogInformation("🔧 嘗試音頻數據恢復策略...");
processedAudioData = AttemptAudioRecovery(audioData);
if (processedAudioData.Count != audioData.Count)
{
_logger.LogInformation("✅ 音頻數據已通過恢復策略處理: {OriginalSize} -> {ProcessedSize} bytes",
audioData.Count, processedAudioData.Count);
}
}
// 將數據寫入 Azure AudioInputStream
var audioBytes = processedAudioData.ToArray();
// PushAudioInputStream 需要使用 Write 方法推送數據
if (audioInputStream is Microsoft.CognitiveServices.Speech.Audio.PushAudioInputStream pushStream)
{
pushStream.Write(audioBytes, audioBytes.Length);
pushStream.Close();
}
else
{
throw new InvalidOperationException("AudioInputStream 類型不支援直接寫入");
}
_logger.LogInformation("音頻數據已傳送到 Azure Speech Services");
}
catch (Exception ex)
{
_logger.LogError(ex, "❌ 音頻流處理失敗: ExceptionType={Type}, Message={Message}",
ex.GetType().Name, ex.Message);
// 記錄音頻處理失敗時的狀態
_logger.LogError("🔍 音頻處理失敗時的狀態:");
_logger.LogError(" - 音頻數據大小: {Size} bytes", audioData?.Count ?? 0);
_logger.LogError(" - AudioInputStream 狀態: {Status}", audioInputStream != null ? "已建立" : "未建立");
// 安全清理資源
SafeCleanupResources(audioInputStream, "音頻流處理失敗");
// 分析具體的音頻處理錯誤
var errorAnalysis = AnalyzeAudioProcessingError(ex, audioData);
_logger.LogError("💡 音頻處理錯誤分析: {Analysis}", errorAnalysis);
throw new InvalidOperationException(errorAnalysis);
}
// 4. 設定音頻配置
using var audioConfig = AudioConfig.FromStreamInput(audioInputStream);
// 5. 建立語音識別器
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
pronunciationConfig.ApplyTo(recognizer);
// 6. 執行語音識別和發音評估
_logger.LogInformation("🎤 開始執行 Azure Speech 語音識別...");
_logger.LogDebug("📋 發音評估參數: ReferenceText='{Text}', Language={Language}", referenceText, language);
var result = await recognizer.RecognizeOnceAsync();
stopwatch.Stop();
// 詳細記錄 Azure Speech Services 回應
_logger.LogInformation("📊 Azure Speech Services 回應: Reason={Reason}, Text='{Text}', Duration={Duration}ms",
result.Reason.ToString(), result.Text ?? "NULL", result.Duration.TotalMilliseconds);
// 記錄所有可能的結果狀態以進行 debug
_logger.LogDebug("🔍 Azure Speech Result 詳細資訊:");
_logger.LogDebug(" - ResultId: {ResultId}", result.ResultId ?? "NULL");
_logger.LogDebug(" - Reason: {Reason} ({ReasonValue})", result.Reason.ToString(), (int)result.Reason);
_logger.LogDebug(" - Text: '{Text}'", result.Text ?? "NULL");
_logger.LogDebug(" - Duration: {Duration}ms", result.Duration.TotalMilliseconds);
// 記錄所有可用的 Properties
if (result.Properties != null)
{
_logger.LogDebug("🏷️ Result Properties:");
// 嘗試獲取常見的屬性
var commonProperties = new[]
{
PropertyId.SpeechServiceResponse_JsonResult,
PropertyId.SpeechServiceResponse_RequestDetailedResultTrueFalse,
PropertyId.SpeechServiceConnection_Endpoint,
PropertyId.SpeechServiceConnection_Region
};
foreach (var propertyId in commonProperties)
{
var value = result.Properties.GetProperty(propertyId);
if (!string.IsNullOrEmpty(value))
{
_logger.LogDebug(" - {PropertyName}: {Value}", propertyId.ToString(), value);
}
}
}
// 7. 檢查結果
if (result.Reason == ResultReason.RecognizedSpeech)
{
var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
// 8. 轉換為系統格式
var assessmentResult = new PronunciationResult
{
AssessmentId = Guid.NewGuid().ToString(),
FlashcardId = flashcardId,
ReferenceText = referenceText,
TranscribedText = result.Text,
Scores = new PronunciationScores
{
Overall = pronunciationResult.AccuracyScore,
Accuracy = pronunciationResult.AccuracyScore,
Fluency = pronunciationResult.FluencyScore,
Completeness = pronunciationResult.CompletenessScore,
Prosody = pronunciationResult.ProsodyScore
},
ProcessingTime = stopwatch.ElapsedMilliseconds,
ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore)
};
// 9. 處理詞彙級別結果
if (pronunciationResult.Words != null)
{
assessmentResult.WordLevelResults = [.. pronunciationResult.Words
.Select(word => new WordLevelResult
{
Word = word.Word,
AccuracyScore = word.AccuracyScore,
ErrorType = word.ErrorType.ToString()
})];
}
// 10. 生成反饋建議
assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults);
_logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds);
return assessmentResult;
}
else if (result.Reason == ResultReason.NoMatch)
{
_logger.LogWarning("❌ Azure Speech Services 未檢測到語音內容");
_logger.LogDebug("🔍 NoMatch 詳細資訊: Text='{Text}', Duration={Duration}ms",
result.Text ?? "NULL", result.Duration.TotalMilliseconds);
// 檢查音頻數據是否足夠
var audioSizeInfo = audioData?.Count ?? 0;
_logger.LogDebug("📊 音頻數據統計: Size={Size}bytes", audioSizeInfo);
throw new InvalidOperationException("未檢測到語音,可能原因:音頻太短、音量太小、背景噪音太大,或音頻格式不正確。請確保音頻清晰並重新錄製。");
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
_logger.LogError("❌ Azure Speech Services 處理被取消");
_logger.LogError("🔍 取消詳細資訊:");
_logger.LogError(" - Reason: {Reason}", cancellation.Reason);
_logger.LogError(" - ErrorCode: {ErrorCode}", cancellation.ErrorCode);
_logger.LogError(" - ErrorDetails: {ErrorDetails}", cancellation.ErrorDetails ?? "NULL");
// 詳細分析錯誤碼
var errorAnalysis = AnalyzeAzureErrorCode(cancellation.ErrorCode.ToString());
_logger.LogError("💡 錯誤分析: {Analysis}", errorAnalysis);
if (cancellation.Reason == CancellationReason.Error)
{
var errorMsg = $"語音識別錯誤: {cancellation.ErrorDetails} (ErrorCode: {cancellation.ErrorCode})\n建議解決方案: {errorAnalysis}";
throw new InvalidOperationException(errorMsg);
}
else
{
throw new InvalidOperationException($"語音識別被取消: {cancellation.Reason},請檢查音頻格式或網路連接\n建議解決方案: {errorAnalysis}");
}
}
else
{
_logger.LogError("❌ 未預期的 Azure Speech Services 結果狀態: {Reason}", result.Reason);
_logger.LogError("🔍 所有可能的 ResultReason 值:");
_logger.LogError(" - RecognizedSpeech = {Value}", (int)ResultReason.RecognizedSpeech);
_logger.LogError(" - NoMatch = {Value}", (int)ResultReason.NoMatch);
_logger.LogError(" - Canceled = {Value}", (int)ResultReason.Canceled);
_logger.LogError(" - 實際收到的值 = {ActualValue}", (int)result.Reason);
throw new InvalidOperationException($"語音識別失敗,未預期的結果狀態: {result.Reason} (值: {(int)result.Reason})");
}
}
catch (System.IO.IOException ioEx)
{
_logger.LogError(ioEx, "❌ 音頻檔案讀取錯誤: FlashcardId={FlashcardId}", flashcardId);
throw new InvalidOperationException("音頻檔案讀取失敗,請檢查檔案是否損壞或重新上傳");
}
catch (UnauthorizedAccessException authEx)
{
_logger.LogError(authEx, "❌ Azure Speech Services 認證錯誤: FlashcardId={FlashcardId}", flashcardId);
throw new InvalidOperationException("Azure Speech Services 認證失敗,請檢查 SubscriptionKey 和 Region 配置");
}
catch (System.Net.WebException webEx)
{
_logger.LogError(webEx, "❌ 網路連接錯誤: FlashcardId={FlashcardId}", flashcardId);
throw new InvalidOperationException("無法連接到 Azure Speech Services請檢查網路連接");
}
catch (TaskCanceledException timeoutEx)
{
_logger.LogError(timeoutEx, "❌ 請求超時: FlashcardId={FlashcardId}", flashcardId);
throw new InvalidOperationException("語音處理超時,請縮短音頻長度或檢查網路速度");
}
catch (Exception ex)
{
_logger.LogError(ex, "❌ 發音評估系統錯誤: FlashcardId={FlashcardId}, ExceptionType={Type}",
flashcardId, ex.GetType().Name);
// 詳細的錯誤分析
var errorAnalysis = AnalyzeGeneralException(ex);
_logger.LogError("💡 錯誤分析結果: {Analysis}", errorAnalysis);
// 檢查內部異常
if (ex.InnerException != null)
{
_logger.LogError("🔍 內部異常: {InnerExceptionType} - {InnerMessage}",
ex.InnerException.GetType().Name, ex.InnerException.Message);
}
throw new InvalidOperationException($"發音評估失敗: {errorAnalysis}");
}
}
public async Task<bool> IsServiceAvailableAsync()
{
try
{
if (string.IsNullOrEmpty(_options.SubscriptionKey))
{
_logger.LogWarning("Azure Speech Services 未配置");
return false;
}
// 簡單的服務可用性檢查
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
return !string.IsNullOrEmpty(speechConfig.Region);
}
catch (Exception ex)
{
_logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤");
return false;
}
}
private static int MapScoreToConfidence(double overallScore)
{
return overallScore switch
{
>= 85 => 2, // 優秀 (高信心)
>= 70 => 1, // 良好 (中信心)
_ => 0 // 需改善 (低信心)
};
}
private static List<string> GenerateFeedback(PronunciationScores scores, List<WordLevelResult> wordResults)
{
var feedback = new List<string>();
// 整體評價
if (scores.Overall >= 90)
feedback.Add("🎉 發音表現優秀!");
else if (scores.Overall >= 80)
feedback.Add("👍 發音表現良好");
else if (scores.Overall >= 70)
feedback.Add("📈 發音有進步空間");
else
feedback.Add("💪 建議多加練習發音");
// 具體建議
if (scores.Accuracy < 70)
feedback.Add("注意發音準確度,可以多聽標準發音範例");
if (scores.Fluency < 70)
feedback.Add("嘗試讓語速更自然流暢");
if (scores.Prosody < 70)
feedback.Add("注意語調和重音的掌握");
// 詞彙級別建議
var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList();
if (problemWords.Count > 0)
{
var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'"));
feedback.Add($"重點練習: {wordList}");
}
return feedback;
}
/// <summary>
/// 音頻數據驗證結果
/// </summary>
public class AudioValidationResult
{
public bool IsValid { get; set; }
public bool HasCriticalErrors { get; set; }
public List<string> Errors { get; set; } = new();
public List<string> Warnings { get; set; } = new();
}
/// <summary>
/// 驗證音頻數據的完整性和品質
/// </summary>
/// <param name="audioData">音頻數據</param>
/// <returns>驗證結果</returns>
private static AudioValidationResult ValidateAudioData(List<byte> audioData)
{
var result = new AudioValidationResult { IsValid = true };
// 檢查數據大小
if (audioData.Count == 0)
{
result.Errors.Add("音頻數據為空");
result.HasCriticalErrors = true;
result.IsValid = false;
}
else if (audioData.Count < 100)
{
result.Errors.Add($"音頻數據過小({audioData.Count} bytes可能是無效數據");
result.HasCriticalErrors = true;
result.IsValid = false;
}
else if (audioData.Count < 1000)
{
result.Warnings.Add($"音頻數據較小({audioData.Count} bytes建議錄音時間至少 1 秒");
result.IsValid = false;
}
// 檢查是否超過大小限制
if (audioData.Count > 10 * 1024 * 1024) // 10MB
{
result.Errors.Add($"音頻檔案過大({audioData.Count / 1024 / 1024:F1}MB請縮短錄音時間");
result.HasCriticalErrors = true;
result.IsValid = false;
}
// 檢查音頻格式特徵
if (audioData.Count >= 4)
{
var header = audioData.Take(4).ToArray();
// 檢查是否為已知的音頻格式
bool isKnownFormat = false;
// WAV 格式 (RIFF)
if (header[0] == 0x52 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x46)
{
isKnownFormat = true;
}
// MP3 格式
else if (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
{
isKnownFormat = true;
}
// WebM/OGG 格式
else if (header[0] == 0x4F && header[1] == 0x67 && header[2] == 0x67 && header[3] == 0x53)
{
isKnownFormat = true;
}
if (!isKnownFormat)
{
result.Warnings.Add("無法識別音頻格式,建議使用 WAV 格式");
result.IsValid = false;
}
}
// 檢查音頻動態範圍(簡單的音量檢測)
if (audioData.Count > 100)
{
var sampleCount = Math.Min(1000, audioData.Count / 2);
var amplitudes = new List<short>();
for (int i = 0; i < sampleCount * 2; i += 2)
{
if (i + 1 < audioData.Count)
{
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
amplitudes.Add(Math.Abs(sample));
}
}
if (amplitudes.Count > 0)
{
var maxAmplitude = amplitudes.Max();
var avgAmplitude = amplitudes.Select(a => (double)a).Average();
if (maxAmplitude < 100)
{
result.Warnings.Add($"音頻音量過低(最大振幅: {maxAmplitude}),可能影響識別準確度");
result.IsValid = false;
}
if (avgAmplitude < 10)
{
result.Warnings.Add("音頻包含過多靜音,建議重新錄製");
result.IsValid = false;
}
// 檢查是否全部為靜音
if (maxAmplitude == 0)
{
result.Errors.Add("音頻為完全靜音,請檢查麥克風設定");
result.HasCriticalErrors = true;
result.IsValid = false;
}
}
}
return result;
}
/// <summary>
/// 安全清理資源
/// </summary>
/// <param name="audioInputStream">音頻輸入流</param>
/// <param name="context">清理上下文</param>
private void SafeCleanupResources(AudioInputStream? audioInputStream, string context)
{
_logger.LogDebug("🧹 開始清理資源 - 上下文: {Context}", context);
// 清理 AudioInputStream
if (audioInputStream != null)
{
try
{
// AudioInputStream 實現了 IDisposable使用 Dispose 方法
audioInputStream.Dispose();
_logger.LogDebug("✅ AudioInputStream 已安全釋放");
}
catch (Exception cleanupEx)
{
_logger.LogWarning(cleanupEx, "⚠️ AudioInputStream 清理時發生警告");
}
}
// 強制垃圾回收(在資源密集操作後)
try
{
GC.Collect();
GC.WaitForPendingFinalizers();
_logger.LogDebug("✅ 記憶體清理完成");
}
catch (Exception gcEx)
{
_logger.LogWarning(gcEx, "⚠️ 記憶體清理時發生警告");
}
}
/// <summary>
/// 嘗試音頻數據恢復策略
/// </summary>
/// <param name="originalAudioData">原始音頻數據</param>
/// <returns>處理後的音頻數據</returns>
private static List<byte> AttemptAudioRecovery(List<byte> originalAudioData)
{
var recoveredData = new List<byte>(originalAudioData);
// 策略 1: 移除開頭和結尾的靜音
recoveredData = RemoveSilence(recoveredData);
// 策略 2: 音量正規化(簡單的放大處理)
recoveredData = NormalizeVolume(recoveredData);
// 策略 3: 確保最小長度
recoveredData = EnsureMinimumLength(recoveredData);
return recoveredData;
}
/// <summary>
/// 移除音頻開頭和結尾的靜音
/// </summary>
/// <param name="audioData">音頻數據</param>
/// <returns>處理後的音頻數據</returns>
private static List<byte> RemoveSilence(List<byte> audioData)
{
if (audioData.Count < 100) return audioData;
var samples = new List<short>();
// 轉換為 16-bit samples
for (int i = 0; i < audioData.Count - 1; i += 2)
{
if (i + 1 < audioData.Count)
{
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
samples.Add(sample);
}
}
if (samples.Count < 50) return audioData;
// 找到開始和結束位置(簡單的靜音檢測)
const short silenceThreshold = 100;
int startIndex = 0;
int endIndex = samples.Count - 1;
// 找開始位置
for (int i = 0; i < samples.Count; i++)
{
if (Math.Abs(samples[i]) > silenceThreshold)
{
startIndex = Math.Max(0, i - 10); // 保留一點緩衝
break;
}
}
// 找結束位置
for (int i = samples.Count - 1; i >= 0; i--)
{
if (Math.Abs(samples[i]) > silenceThreshold)
{
endIndex = Math.Min(samples.Count - 1, i + 10); // 保留一點緩衝
break;
}
}
// 如果沒有找到有效音頻,返回原始數據
if (startIndex >= endIndex) return audioData;
// 轉換回 byte array
var result = new List<byte>();
for (int i = startIndex; i <= endIndex; i++)
{
var sample = samples[i];
result.Add((byte)(sample & 0xFF));
result.Add((byte)((sample >> 8) & 0xFF));
}
return result;
}
/// <summary>
/// 音量正規化(簡單的放大處理)
/// </summary>
/// <param name="audioData">音頻數據</param>
/// <returns>處理後的音頻數據</returns>
private static List<byte> NormalizeVolume(List<byte> audioData)
{
if (audioData.Count < 100) return audioData;
var samples = new List<short>();
// 轉換為 16-bit samples
for (int i = 0; i < audioData.Count - 1; i += 2)
{
if (i + 1 < audioData.Count)
{
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
samples.Add(sample);
}
}
if (samples.Count == 0) return audioData;
// 找到最大振幅
var maxAmplitude = samples.Select(Math.Abs).Max();
// 如果音量太低,進行適度放大
if (maxAmplitude > 0 && maxAmplitude < 1000)
{
double amplificationFactor = Math.Min(3.0, 1000.0 / maxAmplitude); // 最多放大 3 倍
var result = new List<byte>();
foreach (var sample in samples)
{
var amplifiedSample = (short)Math.Max(-32768, Math.Min(32767, sample * amplificationFactor));
result.Add((byte)(amplifiedSample & 0xFF));
result.Add((byte)((amplifiedSample >> 8) & 0xFF));
}
return result;
}
return audioData;
}
/// <summary>
/// 確保音頻達到最小長度
/// </summary>
/// <param name="audioData">音頻數據</param>
/// <returns>處理後的音頻數據</returns>
private static List<byte> EnsureMinimumLength(List<byte> audioData)
{
const int minimumBytes = 1000; // 最少 1KB
if (audioData.Count >= minimumBytes) return audioData;
// 如果音頻太短,在末尾添加少量靜音
var result = new List<byte>(audioData);
var silenceBytesToAdd = minimumBytes - audioData.Count;
// 添加靜音(零值)
for (int i = 0; i < silenceBytesToAdd; i++)
{
result.Add(0);
}
return result;
}
/// <summary>
/// 驗證 Azure Speech Services 配置
/// </summary>
/// <returns>配置驗證結果</returns>
private (bool IsValid, List<string> Errors) ValidateAzureConfiguration()
{
var errors = new List<string>();
// 檢查 SubscriptionKey
if (string.IsNullOrWhiteSpace(_options.SubscriptionKey))
{
errors.Add("SubscriptionKey 未設定");
}
else if (_options.SubscriptionKey.Length < 32)
{
errors.Add($"SubscriptionKey 長度異常 (實際: {_options.SubscriptionKey.Length}, 期望: >=32)");
}
// 檢查 Region
if (string.IsNullOrWhiteSpace(_options.Region))
{
errors.Add("Region 未設定");
}
else
{
// 驗證 Region 格式是否合理
var validRegionPatterns = new[] { "eastus", "westus", "eastasia", "southeastasia", "northeurope", "westeurope" };
if (!validRegionPatterns.Any(pattern => _options.Region.ToLowerInvariant().Contains(pattern)))
{
_logger.LogWarning("⚠️ Region '{Region}' 可能不是標準的 Azure Region 格式", _options.Region);
}
}
// 記錄配置狀態
_logger.LogDebug("🔧 Azure 配置驗證結果:");
_logger.LogDebug(" - SubscriptionKey: {Status}",
string.IsNullOrWhiteSpace(_options.SubscriptionKey) ? "未設定" : $"已設定 (長度: {_options.SubscriptionKey.Length})");
_logger.LogDebug(" - Region: {Region}", _options.Region ?? "未設定");
return (errors.Count == 0, errors);
}
/// <summary>
/// 分析音頻處理錯誤並提供解決建議
/// </summary>
/// <param name="ex">異常對象</param>
/// <param name="audioData">音頻數據</param>
/// <returns>錯誤分析和解決建議</returns>
private static string AnalyzeAudioProcessingError(Exception ex, List<byte>? audioData)
{
var message = ex.Message.ToLowerInvariant();
var exceptionType = ex.GetType().Name;
// 根據異常類型分析
switch (exceptionType)
{
case "OutOfMemoryException":
return "音頻檔案過大,超出系統記憶體限制。建議:縮短錄音時間或降低音質";
case "ArgumentException":
if (message.Contains("audio") || message.Contains("format"))
{
return "音頻格式參數錯誤。建議:使用 WAV 格式16kHz, 16-bit, mono";
}
return "音頻數據參數錯誤。建議:檢查音頻檔案是否完整";
case "InvalidOperationException":
if (message.Contains("stream") || message.Contains("closed"))
{
return "音頻流狀態異常。建議:重新上傳音頻檔案";
}
return "音頻處理操作無效。建議:檢查音頻檔案格式和完整性";
case "IOException":
return "音頻檔案讀取失敗。建議:檢查檔案是否損壞或被其他程序佔用";
case "UnauthorizedAccessException":
return "音頻檔案存取權限不足。建議:檢查檔案權限設定";
case "NotSupportedException":
return "音頻格式不被支援。建議:使用 WAV、MP3 或 WebM 格式";
default:
break;
}
// 根據錯誤訊息內容分析
if (message.Contains("format") || message.Contains("encoding"))
{
return "音頻編碼格式錯誤。建議:轉換為 WAV 格式16kHz, 16-bit, mono";
}
if (message.Contains("empty") || message.Contains("null"))
{
return "音頻數據為空。建議:重新錄製音頻或檢查上傳過程";
}
if (message.Contains("size") || message.Contains("length"))
{
var sizeInfo = audioData?.Count ?? 0;
if (sizeInfo == 0)
{
return "音頻檔案為空。建議:重新錄製音頻";
}
else if (sizeInfo < 1000)
{
return $"音頻檔案過小({sizeInfo} bytes。建議延長錄音時間至少 1 秒";
}
else if (sizeInfo > 10 * 1024 * 1024)
{
return $"音頻檔案過大({sizeInfo / 1024 / 1024:F1}MB。建議縮短錄音時間或降低音質";
}
}
if (message.Contains("timeout") || message.Contains("time"))
{
return "音頻處理超時。建議:縮短音頻長度或檢查網路連接";
}
// 根據音頻數據大小提供建議
var audioSize = audioData?.Count ?? 0;
if (audioSize == 0)
{
return "音頻處理失敗:無音頻數據。建議:重新錄製音頻並確保麥克風正常工作";
}
else if (audioSize < 100)
{
return $"音頻處理失敗:音頻數據異常小({audioSize} bytes。建議檢查錄音設備或重新錄製";
}
// 默認建議
return $"音頻處理失敗({exceptionType})。建議:使用 WAV 格式重新錄製,確保音頻清晰且時長 1-30 秒";
}
/// <summary>
/// 分析一般異常並提供解決建議
/// </summary>
/// <param name="ex">異常對象</param>
/// <returns>錯誤分析和解決建議</returns>
private static string AnalyzeGeneralException(Exception ex)
{
var message = ex.Message.ToLowerInvariant();
// 檢查常見的錯誤模式
if (message.Contains("error code: 0x5") || message.Contains("unauthorized") || message.Contains("forbidden"))
{
return "Azure Speech Services 認證失敗 - 檢查 SubscriptionKey 和 Region 配置";
}
if (message.Contains("error code: 0x6") || message.Contains("audio format") || message.Contains("unsupported"))
{
return "音頻格式不支援 - 使用 WAV 格式16kHz, 16-bit, mono";
}
if (message.Contains("error code: 0x7") || message.Contains("network") || message.Contains("connection"))
{
return "網路連接問題 - 檢查網路連接或防火牆設定";
}
if (message.Contains("timeout") || message.Contains("timed out"))
{
return "請求超時 - 縮短音頻長度或檢查網路速度";
}
if (message.Contains("quota") || message.Contains("limit") || message.Contains("throttle"))
{
return "配額超限或請求過於頻繁 - 稍後再試或升級服務方案";
}
if (message.Contains("region") || message.Contains("endpoint"))
{
return "Region 配置錯誤 - 檢查 Azure Region 是否正確";
}
if (message.Contains("audio") && (message.Contains("empty") || message.Contains("invalid")))
{
return "音頻數據無效 - 重新錄製音頻或檢查音頻格式";
}
// 默認建議
return "系統錯誤 - 檢查網路連接、音頻格式和 Azure 配置,如問題持續請聯繫技術支援";
}
/// <summary>
/// 分析 Azure Speech Services 錯誤碼並提供解決建議
/// </summary>
/// <param name="errorCode">Azure 錯誤碼</param>
/// <returns>錯誤分析和解決建議</returns>
private static string AnalyzeAzureErrorCode(string errorCode)
{
return errorCode switch
{
"BadRequest" => "請求格式不正確 - 檢查音頻格式是否為支援的格式WAV、WebM、MP3",
"Unauthorized" => "認證失敗 - 檢查 Azure Speech Services API Key 是否正確配置",
"Forbidden" => "權限不足 - 檢查 Azure 訂閱是否啟用 Speech Services",
"NotFound" => "找不到資源 - 檢查 Azure Region 是否正確",
"TooManyRequests" => "請求過於頻繁 - 稍後再試或升級服務方案",
"InternalServerError" => "Azure 服務內部錯誤 - 稍後再試",
"ServiceUnavailable" => "服務暫時不可用 - 檢查網路連接或稍後再試",
"0x5" => "認證錯誤 - 檢查 SubscriptionKey 和 Region 配置",
"0x6" => "音頻格式不支援 - 使用 WAV 格式16kHz, 16-bit, mono",
"0x7" => "網路連接問題 - 檢查網路連接或防火牆設定",
"0x8" => "音頻數據損壞 - 重新錄製音頻",
"0x9" => "超時錯誤 - 縮短音頻長度或檢查網路速度",
"0xa" => "配額超限 - 檢查 Azure 服務使用量",
_ => $"未知錯誤碼 '{errorCode}' - 檢查網路連接、音頻格式和 Azure 配置"
};
}
}