1021 lines
42 KiB
C#
1021 lines
42 KiB
C#
using Microsoft.CognitiveServices.Speech;
|
||
using Microsoft.CognitiveServices.Speech.Audio;
|
||
using Microsoft.CognitiveServices.Speech.PronunciationAssessment;
|
||
using Microsoft.Extensions.Options;
|
||
using System.Diagnostics;
|
||
using DramaLing.Api.Models.Configuration;
|
||
using DramaLing.Api.Models.DTOs;
|
||
using DramaLing.Api.Contracts.Services.Speech;
|
||
|
||
namespace DramaLing.Api.Services.Speech;
|
||
|
||
public class AzurePronunciationAssessmentService : IPronunciationAssessmentService
|
||
{
|
||
private readonly AzureSpeechOptions _options;
|
||
private readonly ILogger<AzurePronunciationAssessmentService> _logger;
|
||
|
||
public AzurePronunciationAssessmentService(
|
||
IOptions<AzureSpeechOptions> options,
|
||
ILogger<AzurePronunciationAssessmentService> logger)
|
||
{
|
||
_options = options.Value;
|
||
_logger = logger;
|
||
|
||
// 除錯:檢查 Azure 配置和所有可能的來源
|
||
var keyLength = string.IsNullOrEmpty(_options.SubscriptionKey) ? 0 : _options.SubscriptionKey.Length;
|
||
var keyPrefix = string.IsNullOrEmpty(_options.SubscriptionKey) ? "NULL" : _options.SubscriptionKey.Substring(0, Math.Min(6, _options.SubscriptionKey.Length));
|
||
var keySuffix = string.IsNullOrEmpty(_options.SubscriptionKey) ? "NULL" : _options.SubscriptionKey.Substring(Math.Max(0, _options.SubscriptionKey.Length - 6));
|
||
|
||
_logger.LogInformation("🔍 Azure Speech Services 配置載入詳情:");
|
||
_logger.LogInformation(" Region: {Region}", _options.Region);
|
||
_logger.LogInformation(" KeyLength: {KeyLength}", keyLength);
|
||
_logger.LogInformation(" KeyPrefix: {KeyPrefix}...", keyPrefix);
|
||
_logger.LogInformation(" KeySuffix: ...{KeySuffix}", keySuffix);
|
||
_logger.LogInformation(" EnableDetailedResult: {EnableDetailedResult}", _options.EnableDetailedResult);
|
||
_logger.LogInformation(" TimeoutSeconds: {TimeoutSeconds}", _options.TimeoutSeconds);
|
||
|
||
// 檢查環境變數
|
||
var envKey = Environment.GetEnvironmentVariable("AzureSpeech__SubscriptionKey");
|
||
if (!string.IsNullOrEmpty(envKey))
|
||
{
|
||
_logger.LogWarning("⚠️ 發現環境變數 AzureSpeech__SubscriptionKey: {EnvKeyPrefix}...{EnvKeySuffix}",
|
||
envKey.Substring(0, Math.Min(6, envKey.Length)),
|
||
envKey.Substring(Math.Max(0, envKey.Length - 6)));
|
||
}
|
||
|
||
if (string.IsNullOrEmpty(_options.SubscriptionKey))
|
||
{
|
||
_logger.LogError("⚠️ Azure Speech Services SubscriptionKey 為空!請檢查 User Secrets 配置");
|
||
}
|
||
else if (!_options.SubscriptionKey.StartsWith("AKV"))
|
||
{
|
||
_logger.LogWarning("⚠️ SubscriptionKey 格式看起來不正確,期望以 'AKV' 開頭");
|
||
}
|
||
}
|
||
|
||
public async Task<PronunciationResult> EvaluatePronunciationAsync(
|
||
Stream audioStream,
|
||
string referenceText,
|
||
string flashcardId,
|
||
string language = "en-US")
|
||
{
|
||
var stopwatch = Stopwatch.StartNew();
|
||
|
||
try
|
||
{
|
||
_logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language);
|
||
|
||
// 1. 驗證 Azure 配置
|
||
var configValidation = ValidateAzureConfiguration();
|
||
if (!configValidation.IsValid)
|
||
{
|
||
_logger.LogError("❌ Azure Speech Services 配置驗證失敗: {Errors}",
|
||
string.Join(", ", configValidation.Errors));
|
||
throw new InvalidOperationException($"Azure 配置錯誤: {string.Join(", ", configValidation.Errors)}");
|
||
}
|
||
|
||
_logger.LogInformation("✅ Azure Speech Services 配置驗證通過");
|
||
|
||
// 2. 設定 Azure Speech Config
|
||
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
|
||
speechConfig.SpeechRecognitionLanguage = language;
|
||
|
||
// 2. 設定發音評估參數
|
||
var pronunciationConfig = new PronunciationAssessmentConfig(
|
||
referenceText,
|
||
GradingSystem.HundredMark,
|
||
Granularity.Word,
|
||
enableMiscue: true
|
||
);
|
||
|
||
// 3. 處理音頻流 - 支援多種格式
|
||
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
|
||
AudioInputStream? audioInputStream = null;
|
||
var audioData = new List<byte>(); // 移到更高作用域以供後續錯誤處理使用
|
||
|
||
try
|
||
{
|
||
audioInputStream = AudioInputStream.CreatePushStream(audioFormat);
|
||
_logger.LogDebug("✅ AudioInputStream 已建立");
|
||
// 重置 stream position 到開頭
|
||
if (audioStream.CanSeek)
|
||
{
|
||
audioStream.Position = 0;
|
||
_logger.LogDebug("音頻流大小: {Size} bytes", audioStream.Length);
|
||
}
|
||
|
||
// 讀取所有音頻數據
|
||
var buffer = new byte[4096];
|
||
int bytesRead;
|
||
|
||
while ((bytesRead = await audioStream.ReadAsync(buffer.AsMemory(0, buffer.Length))) > 0)
|
||
{
|
||
for (int i = 0; i < bytesRead; i++)
|
||
{
|
||
audioData.Add(buffer[i]);
|
||
}
|
||
}
|
||
|
||
_logger.LogInformation("成功讀取音頻數據: {Size} bytes", audioData.Count);
|
||
|
||
if (audioData.Count == 0)
|
||
{
|
||
throw new InvalidOperationException("音頻數據為空,請重新錄製音頻並確保麥克風正常工作");
|
||
}
|
||
|
||
// 增強的音頻數據驗證
|
||
var validationResult = ValidateAudioData(audioData);
|
||
if (!validationResult.IsValid)
|
||
{
|
||
_logger.LogWarning("⚠️ 音頻數據驗證警告: {Warnings}", string.Join(", ", validationResult.Warnings));
|
||
|
||
// 如果有嚴重錯誤,直接拋出異常
|
||
if (validationResult.HasCriticalErrors)
|
||
{
|
||
throw new InvalidOperationException($"音頻數據驗證失敗: {string.Join(", ", validationResult.Errors)}");
|
||
}
|
||
}
|
||
|
||
// 驗證音頻數據的基本特徵
|
||
if (audioData.Count < 1000) // 少於 1KB 可能不是有效音頻
|
||
{
|
||
_logger.LogWarning("音頻數據過小,可能無效: {Size} bytes", audioData.Count);
|
||
}
|
||
|
||
// 詳細的音頻數據分析
|
||
_logger.LogDebug("🔊 音頻數據分析:");
|
||
_logger.LogDebug(" - 總大小: {TotalSize} bytes", audioData.Count);
|
||
_logger.LogDebug(" - 預估時長: ~{Duration:F1} 秒 (假設 16kHz 16-bit mono)",
|
||
audioData.Count / (16000.0 * 2)); // 16kHz * 2 bytes per sample
|
||
|
||
// 檢查音頻數據頭部特徵
|
||
if (audioData.Count >= 4)
|
||
{
|
||
var header = audioData.Take(4).ToArray();
|
||
var headerHex = string.Join(" ", header.Select(b => b.ToString("X2")));
|
||
_logger.LogDebug(" - 檔案頭部: {Header}", headerHex);
|
||
|
||
// 檢查常見的音頻格式標識
|
||
if (header[0] == 0x52 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x46)
|
||
{
|
||
_logger.LogDebug(" - 檢測到 WAV 格式 (RIFF header)");
|
||
}
|
||
else if (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
|
||
{
|
||
_logger.LogDebug(" - 檢測到 MP3 格式");
|
||
}
|
||
else
|
||
{
|
||
_logger.LogDebug(" - 未識別的音頻格式,可能是 raw PCM 或其他格式");
|
||
}
|
||
}
|
||
|
||
// 檢查音頻數據的動態範圍(簡單的音量檢測)
|
||
if (audioData.Count > 100)
|
||
{
|
||
var sampleValues = new List<short>();
|
||
for (int i = 0; i < Math.Min(audioData.Count - 1, 1000); i += 2)
|
||
{
|
||
if (i + 1 < audioData.Count)
|
||
{
|
||
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
|
||
sampleValues.Add(Math.Abs(sample));
|
||
}
|
||
}
|
||
|
||
if (sampleValues.Count > 0)
|
||
{
|
||
var maxAmplitude = sampleValues.Max();
|
||
var avgAmplitude = sampleValues.Select(s => (double)s).Average();
|
||
|
||
_logger.LogDebug(" - 最大振幅: {Max}", maxAmplitude);
|
||
_logger.LogDebug(" - 平均振幅: {Avg:F1}", avgAmplitude);
|
||
|
||
if (maxAmplitude < 100)
|
||
{
|
||
_logger.LogWarning("⚠️ 音頻音量可能過低 (最大振幅: {Max})", maxAmplitude);
|
||
}
|
||
else if (avgAmplitude < 10)
|
||
{
|
||
_logger.LogWarning("⚠️ 音頻平均音量過低,可能包含過多靜音");
|
||
}
|
||
}
|
||
}
|
||
|
||
// 嘗試音頻數據恢復策略(如果需要)
|
||
var processedAudioData = audioData;
|
||
if (!validationResult.IsValid && !validationResult.HasCriticalErrors)
|
||
{
|
||
_logger.LogInformation("🔧 嘗試音頻數據恢復策略...");
|
||
processedAudioData = AttemptAudioRecovery(audioData);
|
||
|
||
if (processedAudioData.Count != audioData.Count)
|
||
{
|
||
_logger.LogInformation("✅ 音頻數據已通過恢復策略處理: {OriginalSize} -> {ProcessedSize} bytes",
|
||
audioData.Count, processedAudioData.Count);
|
||
}
|
||
}
|
||
|
||
// 將數據寫入 Azure AudioInputStream
|
||
var audioBytes = processedAudioData.ToArray();
|
||
|
||
// PushAudioInputStream 需要使用 Write 方法推送數據
|
||
if (audioInputStream is Microsoft.CognitiveServices.Speech.Audio.PushAudioInputStream pushStream)
|
||
{
|
||
pushStream.Write(audioBytes, audioBytes.Length);
|
||
pushStream.Close();
|
||
}
|
||
else
|
||
{
|
||
throw new InvalidOperationException("AudioInputStream 類型不支援直接寫入");
|
||
}
|
||
|
||
_logger.LogInformation("音頻數據已傳送到 Azure Speech Services");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_logger.LogError(ex, "❌ 音頻流處理失敗: ExceptionType={Type}, Message={Message}",
|
||
ex.GetType().Name, ex.Message);
|
||
|
||
// 記錄音頻處理失敗時的狀態
|
||
_logger.LogError("🔍 音頻處理失敗時的狀態:");
|
||
_logger.LogError(" - 音頻數據大小: {Size} bytes", audioData?.Count ?? 0);
|
||
_logger.LogError(" - AudioInputStream 狀態: {Status}", audioInputStream != null ? "已建立" : "未建立");
|
||
|
||
// 安全清理資源
|
||
SafeCleanupResources(audioInputStream, "音頻流處理失敗");
|
||
|
||
// 分析具體的音頻處理錯誤
|
||
var errorAnalysis = AnalyzeAudioProcessingError(ex, audioData);
|
||
_logger.LogError("💡 音頻處理錯誤分析: {Analysis}", errorAnalysis);
|
||
|
||
throw new InvalidOperationException(errorAnalysis);
|
||
}
|
||
|
||
// 4. 設定音頻配置
|
||
using var audioConfig = AudioConfig.FromStreamInput(audioInputStream);
|
||
|
||
// 5. 建立語音識別器
|
||
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
|
||
pronunciationConfig.ApplyTo(recognizer);
|
||
|
||
// 6. 執行語音識別和發音評估
|
||
_logger.LogInformation("🎤 開始執行 Azure Speech 語音識別...");
|
||
_logger.LogDebug("📋 發音評估參數: ReferenceText='{Text}', Language={Language}", referenceText, language);
|
||
|
||
var result = await recognizer.RecognizeOnceAsync();
|
||
stopwatch.Stop();
|
||
|
||
// 詳細記錄 Azure Speech Services 回應
|
||
_logger.LogInformation("📊 Azure Speech Services 回應: Reason={Reason}, Text='{Text}', Duration={Duration}ms",
|
||
result.Reason.ToString(), result.Text ?? "NULL", result.Duration.TotalMilliseconds);
|
||
|
||
// 記錄所有可能的結果狀態以進行 debug
|
||
_logger.LogDebug("🔍 Azure Speech Result 詳細資訊:");
|
||
_logger.LogDebug(" - ResultId: {ResultId}", result.ResultId ?? "NULL");
|
||
_logger.LogDebug(" - Reason: {Reason} ({ReasonValue})", result.Reason.ToString(), (int)result.Reason);
|
||
_logger.LogDebug(" - Text: '{Text}'", result.Text ?? "NULL");
|
||
_logger.LogDebug(" - Duration: {Duration}ms", result.Duration.TotalMilliseconds);
|
||
// 記錄所有可用的 Properties
|
||
if (result.Properties != null)
|
||
{
|
||
_logger.LogDebug("🏷️ Result Properties:");
|
||
|
||
// 嘗試獲取常見的屬性
|
||
var commonProperties = new[]
|
||
{
|
||
PropertyId.SpeechServiceResponse_JsonResult,
|
||
PropertyId.SpeechServiceResponse_RequestDetailedResultTrueFalse,
|
||
PropertyId.SpeechServiceConnection_Endpoint,
|
||
PropertyId.SpeechServiceConnection_Region
|
||
};
|
||
|
||
foreach (var propertyId in commonProperties)
|
||
{
|
||
var value = result.Properties.GetProperty(propertyId);
|
||
if (!string.IsNullOrEmpty(value))
|
||
{
|
||
_logger.LogDebug(" - {PropertyName}: {Value}", propertyId.ToString(), value);
|
||
}
|
||
}
|
||
}
|
||
|
||
// 7. 檢查結果
|
||
if (result.Reason == ResultReason.RecognizedSpeech)
|
||
{
|
||
var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
|
||
|
||
// 8. 轉換為系統格式
|
||
var assessmentResult = new PronunciationResult
|
||
{
|
||
AssessmentId = Guid.NewGuid().ToString(),
|
||
FlashcardId = flashcardId,
|
||
ReferenceText = referenceText,
|
||
TranscribedText = result.Text,
|
||
Scores = new PronunciationScores
|
||
{
|
||
Overall = pronunciationResult.AccuracyScore,
|
||
Accuracy = pronunciationResult.AccuracyScore,
|
||
Fluency = pronunciationResult.FluencyScore,
|
||
Completeness = pronunciationResult.CompletenessScore,
|
||
Prosody = pronunciationResult.ProsodyScore
|
||
},
|
||
ProcessingTime = stopwatch.ElapsedMilliseconds,
|
||
ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore)
|
||
};
|
||
|
||
// 9. 處理詞彙級別結果
|
||
if (pronunciationResult.Words != null)
|
||
{
|
||
assessmentResult.WordLevelResults = [.. pronunciationResult.Words
|
||
.Select(word => new WordLevelResult
|
||
{
|
||
Word = word.Word,
|
||
AccuracyScore = word.AccuracyScore,
|
||
ErrorType = word.ErrorType.ToString()
|
||
})];
|
||
}
|
||
|
||
// 10. 生成反饋建議
|
||
assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults);
|
||
|
||
_logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
|
||
pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds);
|
||
|
||
return assessmentResult;
|
||
}
|
||
else if (result.Reason == ResultReason.NoMatch)
|
||
{
|
||
_logger.LogWarning("❌ Azure Speech Services 未檢測到語音內容");
|
||
_logger.LogDebug("🔍 NoMatch 詳細資訊: Text='{Text}', Duration={Duration}ms",
|
||
result.Text ?? "NULL", result.Duration.TotalMilliseconds);
|
||
|
||
// 檢查音頻數據是否足夠
|
||
var audioSizeInfo = audioData?.Count ?? 0;
|
||
_logger.LogDebug("📊 音頻數據統計: Size={Size}bytes", audioSizeInfo);
|
||
|
||
throw new InvalidOperationException("未檢測到語音,可能原因:音頻太短、音量太小、背景噪音太大,或音頻格式不正確。請確保音頻清晰並重新錄製。");
|
||
}
|
||
else if (result.Reason == ResultReason.Canceled)
|
||
{
|
||
var cancellation = CancellationDetails.FromResult(result);
|
||
_logger.LogError("❌ Azure Speech Services 處理被取消");
|
||
_logger.LogError("🔍 取消詳細資訊:");
|
||
_logger.LogError(" - Reason: {Reason}", cancellation.Reason);
|
||
_logger.LogError(" - ErrorCode: {ErrorCode}", cancellation.ErrorCode);
|
||
_logger.LogError(" - ErrorDetails: {ErrorDetails}", cancellation.ErrorDetails ?? "NULL");
|
||
|
||
// 詳細分析錯誤碼
|
||
var errorAnalysis = AnalyzeAzureErrorCode(cancellation.ErrorCode.ToString());
|
||
_logger.LogError("💡 錯誤分析: {Analysis}", errorAnalysis);
|
||
|
||
if (cancellation.Reason == CancellationReason.Error)
|
||
{
|
||
var errorMsg = $"語音識別錯誤: {cancellation.ErrorDetails} (ErrorCode: {cancellation.ErrorCode})\n建議解決方案: {errorAnalysis}";
|
||
throw new InvalidOperationException(errorMsg);
|
||
}
|
||
else
|
||
{
|
||
throw new InvalidOperationException($"語音識別被取消: {cancellation.Reason},請檢查音頻格式或網路連接\n建議解決方案: {errorAnalysis}");
|
||
}
|
||
}
|
||
else
|
||
{
|
||
_logger.LogError("❌ 未預期的 Azure Speech Services 結果狀態: {Reason}", result.Reason);
|
||
_logger.LogError("🔍 所有可能的 ResultReason 值:");
|
||
_logger.LogError(" - RecognizedSpeech = {Value}", (int)ResultReason.RecognizedSpeech);
|
||
_logger.LogError(" - NoMatch = {Value}", (int)ResultReason.NoMatch);
|
||
_logger.LogError(" - Canceled = {Value}", (int)ResultReason.Canceled);
|
||
_logger.LogError(" - 實際收到的值 = {ActualValue}", (int)result.Reason);
|
||
|
||
throw new InvalidOperationException($"語音識別失敗,未預期的結果狀態: {result.Reason} (值: {(int)result.Reason})");
|
||
}
|
||
}
|
||
catch (System.IO.IOException ioEx)
|
||
{
|
||
_logger.LogError(ioEx, "❌ 音頻檔案讀取錯誤: FlashcardId={FlashcardId}", flashcardId);
|
||
throw new InvalidOperationException("音頻檔案讀取失敗,請檢查檔案是否損壞或重新上傳");
|
||
}
|
||
catch (UnauthorizedAccessException authEx)
|
||
{
|
||
_logger.LogError(authEx, "❌ Azure Speech Services 認證錯誤: FlashcardId={FlashcardId}", flashcardId);
|
||
throw new InvalidOperationException("Azure Speech Services 認證失敗,請檢查 SubscriptionKey 和 Region 配置");
|
||
}
|
||
catch (System.Net.WebException webEx)
|
||
{
|
||
_logger.LogError(webEx, "❌ 網路連接錯誤: FlashcardId={FlashcardId}", flashcardId);
|
||
throw new InvalidOperationException("無法連接到 Azure Speech Services,請檢查網路連接");
|
||
}
|
||
catch (TaskCanceledException timeoutEx)
|
||
{
|
||
_logger.LogError(timeoutEx, "❌ 請求超時: FlashcardId={FlashcardId}", flashcardId);
|
||
throw new InvalidOperationException("語音處理超時,請縮短音頻長度或檢查網路速度");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_logger.LogError(ex, "❌ 發音評估系統錯誤: FlashcardId={FlashcardId}, ExceptionType={Type}",
|
||
flashcardId, ex.GetType().Name);
|
||
|
||
// 詳細的錯誤分析
|
||
var errorAnalysis = AnalyzeGeneralException(ex);
|
||
_logger.LogError("💡 錯誤分析結果: {Analysis}", errorAnalysis);
|
||
|
||
// 檢查內部異常
|
||
if (ex.InnerException != null)
|
||
{
|
||
_logger.LogError("🔍 內部異常: {InnerExceptionType} - {InnerMessage}",
|
||
ex.InnerException.GetType().Name, ex.InnerException.Message);
|
||
}
|
||
|
||
throw new InvalidOperationException($"發音評估失敗: {errorAnalysis}");
|
||
}
|
||
}
|
||
|
||
public async Task<bool> IsServiceAvailableAsync()
|
||
{
|
||
try
|
||
{
|
||
if (string.IsNullOrEmpty(_options.SubscriptionKey))
|
||
{
|
||
_logger.LogWarning("Azure Speech Services 未配置");
|
||
return false;
|
||
}
|
||
|
||
// 簡單的服務可用性檢查
|
||
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
|
||
return !string.IsNullOrEmpty(speechConfig.Region);
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤");
|
||
return false;
|
||
}
|
||
}
|
||
|
||
private static int MapScoreToConfidence(double overallScore)
|
||
{
|
||
return overallScore switch
|
||
{
|
||
>= 85 => 2, // 優秀 (高信心)
|
||
>= 70 => 1, // 良好 (中信心)
|
||
_ => 0 // 需改善 (低信心)
|
||
};
|
||
}
|
||
|
||
private static List<string> GenerateFeedback(PronunciationScores scores, List<WordLevelResult> wordResults)
|
||
{
|
||
var feedback = new List<string>();
|
||
|
||
// 整體評價
|
||
if (scores.Overall >= 90)
|
||
feedback.Add("🎉 發音表現優秀!");
|
||
else if (scores.Overall >= 80)
|
||
feedback.Add("👍 發音表現良好");
|
||
else if (scores.Overall >= 70)
|
||
feedback.Add("📈 發音有進步空間");
|
||
else
|
||
feedback.Add("💪 建議多加練習發音");
|
||
|
||
// 具體建議
|
||
if (scores.Accuracy < 70)
|
||
feedback.Add("注意發音準確度,可以多聽標準發音範例");
|
||
|
||
if (scores.Fluency < 70)
|
||
feedback.Add("嘗試讓語速更自然流暢");
|
||
|
||
if (scores.Prosody < 70)
|
||
feedback.Add("注意語調和重音的掌握");
|
||
|
||
// 詞彙級別建議
|
||
var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList();
|
||
if (problemWords.Count > 0)
|
||
{
|
||
var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'"));
|
||
feedback.Add($"重點練習: {wordList}");
|
||
}
|
||
|
||
return feedback;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 音頻數據驗證結果
|
||
/// </summary>
|
||
public class AudioValidationResult
|
||
{
|
||
public bool IsValid { get; set; }
|
||
public bool HasCriticalErrors { get; set; }
|
||
public List<string> Errors { get; set; } = new();
|
||
public List<string> Warnings { get; set; } = new();
|
||
}
|
||
|
||
/// <summary>
|
||
/// 驗證音頻數據的完整性和品質
|
||
/// </summary>
|
||
/// <param name="audioData">音頻數據</param>
|
||
/// <returns>驗證結果</returns>
|
||
private static AudioValidationResult ValidateAudioData(List<byte> audioData)
|
||
{
|
||
var result = new AudioValidationResult { IsValid = true };
|
||
|
||
// 檢查數據大小
|
||
if (audioData.Count == 0)
|
||
{
|
||
result.Errors.Add("音頻數據為空");
|
||
result.HasCriticalErrors = true;
|
||
result.IsValid = false;
|
||
}
|
||
else if (audioData.Count < 100)
|
||
{
|
||
result.Errors.Add($"音頻數據過小({audioData.Count} bytes),可能是無效數據");
|
||
result.HasCriticalErrors = true;
|
||
result.IsValid = false;
|
||
}
|
||
else if (audioData.Count < 1000)
|
||
{
|
||
result.Warnings.Add($"音頻數據較小({audioData.Count} bytes),建議錄音時間至少 1 秒");
|
||
result.IsValid = false;
|
||
}
|
||
|
||
// 檢查是否超過大小限制
|
||
if (audioData.Count > 10 * 1024 * 1024) // 10MB
|
||
{
|
||
result.Errors.Add($"音頻檔案過大({audioData.Count / 1024 / 1024:F1}MB),請縮短錄音時間");
|
||
result.HasCriticalErrors = true;
|
||
result.IsValid = false;
|
||
}
|
||
|
||
// 檢查音頻格式特徵
|
||
if (audioData.Count >= 4)
|
||
{
|
||
var header = audioData.Take(4).ToArray();
|
||
|
||
// 檢查是否為已知的音頻格式
|
||
bool isKnownFormat = false;
|
||
|
||
// WAV 格式 (RIFF)
|
||
if (header[0] == 0x52 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x46)
|
||
{
|
||
isKnownFormat = true;
|
||
}
|
||
// MP3 格式
|
||
else if (header[0] == 0xFF && (header[1] & 0xE0) == 0xE0)
|
||
{
|
||
isKnownFormat = true;
|
||
}
|
||
// WebM/OGG 格式
|
||
else if (header[0] == 0x4F && header[1] == 0x67 && header[2] == 0x67 && header[3] == 0x53)
|
||
{
|
||
isKnownFormat = true;
|
||
}
|
||
|
||
if (!isKnownFormat)
|
||
{
|
||
result.Warnings.Add("無法識別音頻格式,建議使用 WAV 格式");
|
||
result.IsValid = false;
|
||
}
|
||
}
|
||
|
||
// 檢查音頻動態範圍(簡單的音量檢測)
|
||
if (audioData.Count > 100)
|
||
{
|
||
var sampleCount = Math.Min(1000, audioData.Count / 2);
|
||
var amplitudes = new List<short>();
|
||
|
||
for (int i = 0; i < sampleCount * 2; i += 2)
|
||
{
|
||
if (i + 1 < audioData.Count)
|
||
{
|
||
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
|
||
amplitudes.Add(Math.Abs(sample));
|
||
}
|
||
}
|
||
|
||
if (amplitudes.Count > 0)
|
||
{
|
||
var maxAmplitude = amplitudes.Max();
|
||
var avgAmplitude = amplitudes.Select(a => (double)a).Average();
|
||
|
||
if (maxAmplitude < 100)
|
||
{
|
||
result.Warnings.Add($"音頻音量過低(最大振幅: {maxAmplitude}),可能影響識別準確度");
|
||
result.IsValid = false;
|
||
}
|
||
|
||
if (avgAmplitude < 10)
|
||
{
|
||
result.Warnings.Add("音頻包含過多靜音,建議重新錄製");
|
||
result.IsValid = false;
|
||
}
|
||
|
||
// 檢查是否全部為靜音
|
||
if (maxAmplitude == 0)
|
||
{
|
||
result.Errors.Add("音頻為完全靜音,請檢查麥克風設定");
|
||
result.HasCriticalErrors = true;
|
||
result.IsValid = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 安全清理資源
|
||
/// </summary>
|
||
/// <param name="audioInputStream">音頻輸入流</param>
|
||
/// <param name="context">清理上下文</param>
|
||
private void SafeCleanupResources(AudioInputStream? audioInputStream, string context)
|
||
{
|
||
_logger.LogDebug("🧹 開始清理資源 - 上下文: {Context}", context);
|
||
|
||
// 清理 AudioInputStream
|
||
if (audioInputStream != null)
|
||
{
|
||
try
|
||
{
|
||
// AudioInputStream 實現了 IDisposable,使用 Dispose 方法
|
||
audioInputStream.Dispose();
|
||
_logger.LogDebug("✅ AudioInputStream 已安全釋放");
|
||
}
|
||
catch (Exception cleanupEx)
|
||
{
|
||
_logger.LogWarning(cleanupEx, "⚠️ AudioInputStream 清理時發生警告");
|
||
}
|
||
}
|
||
|
||
// 強制垃圾回收(在資源密集操作後)
|
||
try
|
||
{
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
_logger.LogDebug("✅ 記憶體清理完成");
|
||
}
|
||
catch (Exception gcEx)
|
||
{
|
||
_logger.LogWarning(gcEx, "⚠️ 記憶體清理時發生警告");
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 嘗試音頻數據恢復策略
|
||
/// </summary>
|
||
/// <param name="originalAudioData">原始音頻數據</param>
|
||
/// <returns>處理後的音頻數據</returns>
|
||
private static List<byte> AttemptAudioRecovery(List<byte> originalAudioData)
|
||
{
|
||
var recoveredData = new List<byte>(originalAudioData);
|
||
|
||
// 策略 1: 移除開頭和結尾的靜音
|
||
recoveredData = RemoveSilence(recoveredData);
|
||
|
||
// 策略 2: 音量正規化(簡單的放大處理)
|
||
recoveredData = NormalizeVolume(recoveredData);
|
||
|
||
// 策略 3: 確保最小長度
|
||
recoveredData = EnsureMinimumLength(recoveredData);
|
||
|
||
return recoveredData;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 移除音頻開頭和結尾的靜音
|
||
/// </summary>
|
||
/// <param name="audioData">音頻數據</param>
|
||
/// <returns>處理後的音頻數據</returns>
|
||
private static List<byte> RemoveSilence(List<byte> audioData)
|
||
{
|
||
if (audioData.Count < 100) return audioData;
|
||
|
||
var samples = new List<short>();
|
||
|
||
// 轉換為 16-bit samples
|
||
for (int i = 0; i < audioData.Count - 1; i += 2)
|
||
{
|
||
if (i + 1 < audioData.Count)
|
||
{
|
||
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
|
||
samples.Add(sample);
|
||
}
|
||
}
|
||
|
||
if (samples.Count < 50) return audioData;
|
||
|
||
// 找到開始和結束位置(簡單的靜音檢測)
|
||
const short silenceThreshold = 100;
|
||
int startIndex = 0;
|
||
int endIndex = samples.Count - 1;
|
||
|
||
// 找開始位置
|
||
for (int i = 0; i < samples.Count; i++)
|
||
{
|
||
if (Math.Abs(samples[i]) > silenceThreshold)
|
||
{
|
||
startIndex = Math.Max(0, i - 10); // 保留一點緩衝
|
||
break;
|
||
}
|
||
}
|
||
|
||
// 找結束位置
|
||
for (int i = samples.Count - 1; i >= 0; i--)
|
||
{
|
||
if (Math.Abs(samples[i]) > silenceThreshold)
|
||
{
|
||
endIndex = Math.Min(samples.Count - 1, i + 10); // 保留一點緩衝
|
||
break;
|
||
}
|
||
}
|
||
|
||
// 如果沒有找到有效音頻,返回原始數據
|
||
if (startIndex >= endIndex) return audioData;
|
||
|
||
// 轉換回 byte array
|
||
var result = new List<byte>();
|
||
for (int i = startIndex; i <= endIndex; i++)
|
||
{
|
||
var sample = samples[i];
|
||
result.Add((byte)(sample & 0xFF));
|
||
result.Add((byte)((sample >> 8) & 0xFF));
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 音量正規化(簡單的放大處理)
|
||
/// </summary>
|
||
/// <param name="audioData">音頻數據</param>
|
||
/// <returns>處理後的音頻數據</returns>
|
||
private static List<byte> NormalizeVolume(List<byte> audioData)
|
||
{
|
||
if (audioData.Count < 100) return audioData;
|
||
|
||
var samples = new List<short>();
|
||
|
||
// 轉換為 16-bit samples
|
||
for (int i = 0; i < audioData.Count - 1; i += 2)
|
||
{
|
||
if (i + 1 < audioData.Count)
|
||
{
|
||
short sample = (short)(audioData[i] | (audioData[i + 1] << 8));
|
||
samples.Add(sample);
|
||
}
|
||
}
|
||
|
||
if (samples.Count == 0) return audioData;
|
||
|
||
// 找到最大振幅
|
||
var maxAmplitude = samples.Select(Math.Abs).Max();
|
||
|
||
// 如果音量太低,進行適度放大
|
||
if (maxAmplitude > 0 && maxAmplitude < 1000)
|
||
{
|
||
double amplificationFactor = Math.Min(3.0, 1000.0 / maxAmplitude); // 最多放大 3 倍
|
||
|
||
var result = new List<byte>();
|
||
foreach (var sample in samples)
|
||
{
|
||
var amplifiedSample = (short)Math.Max(-32768, Math.Min(32767, sample * amplificationFactor));
|
||
result.Add((byte)(amplifiedSample & 0xFF));
|
||
result.Add((byte)((amplifiedSample >> 8) & 0xFF));
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
return audioData;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 確保音頻達到最小長度
|
||
/// </summary>
|
||
/// <param name="audioData">音頻數據</param>
|
||
/// <returns>處理後的音頻數據</returns>
|
||
private static List<byte> EnsureMinimumLength(List<byte> audioData)
|
||
{
|
||
const int minimumBytes = 1000; // 最少 1KB
|
||
|
||
if (audioData.Count >= minimumBytes) return audioData;
|
||
|
||
// 如果音頻太短,在末尾添加少量靜音
|
||
var result = new List<byte>(audioData);
|
||
var silenceBytesToAdd = minimumBytes - audioData.Count;
|
||
|
||
// 添加靜音(零值)
|
||
for (int i = 0; i < silenceBytesToAdd; i++)
|
||
{
|
||
result.Add(0);
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 驗證 Azure Speech Services 配置
|
||
/// </summary>
|
||
/// <returns>配置驗證結果</returns>
|
||
private (bool IsValid, List<string> Errors) ValidateAzureConfiguration()
|
||
{
|
||
var errors = new List<string>();
|
||
|
||
// 檢查 SubscriptionKey
|
||
if (string.IsNullOrWhiteSpace(_options.SubscriptionKey))
|
||
{
|
||
errors.Add("SubscriptionKey 未設定");
|
||
}
|
||
else if (_options.SubscriptionKey.Length < 32)
|
||
{
|
||
errors.Add($"SubscriptionKey 長度異常 (實際: {_options.SubscriptionKey.Length}, 期望: >=32)");
|
||
}
|
||
|
||
// 檢查 Region
|
||
if (string.IsNullOrWhiteSpace(_options.Region))
|
||
{
|
||
errors.Add("Region 未設定");
|
||
}
|
||
else
|
||
{
|
||
// 驗證 Region 格式是否合理
|
||
var validRegionPatterns = new[] { "eastus", "westus", "eastasia", "southeastasia", "northeurope", "westeurope" };
|
||
if (!validRegionPatterns.Any(pattern => _options.Region.ToLowerInvariant().Contains(pattern)))
|
||
{
|
||
_logger.LogWarning("⚠️ Region '{Region}' 可能不是標準的 Azure Region 格式", _options.Region);
|
||
}
|
||
}
|
||
|
||
// 記錄配置狀態
|
||
_logger.LogDebug("🔧 Azure 配置驗證結果:");
|
||
_logger.LogDebug(" - SubscriptionKey: {Status}",
|
||
string.IsNullOrWhiteSpace(_options.SubscriptionKey) ? "未設定" : $"已設定 (長度: {_options.SubscriptionKey.Length})");
|
||
_logger.LogDebug(" - Region: {Region}", _options.Region ?? "未設定");
|
||
|
||
return (errors.Count == 0, errors);
|
||
}
|
||
|
||
/// <summary>
|
||
/// 分析音頻處理錯誤並提供解決建議
|
||
/// </summary>
|
||
/// <param name="ex">異常對象</param>
|
||
/// <param name="audioData">音頻數據</param>
|
||
/// <returns>錯誤分析和解決建議</returns>
|
||
private static string AnalyzeAudioProcessingError(Exception ex, List<byte>? audioData)
|
||
{
|
||
var message = ex.Message.ToLowerInvariant();
|
||
var exceptionType = ex.GetType().Name;
|
||
|
||
// 根據異常類型分析
|
||
switch (exceptionType)
|
||
{
|
||
case "OutOfMemoryException":
|
||
return "音頻檔案過大,超出系統記憶體限制。建議:縮短錄音時間或降低音質";
|
||
|
||
case "ArgumentException":
|
||
if (message.Contains("audio") || message.Contains("format"))
|
||
{
|
||
return "音頻格式參數錯誤。建議:使用 WAV 格式(16kHz, 16-bit, mono)";
|
||
}
|
||
return "音頻數據參數錯誤。建議:檢查音頻檔案是否完整";
|
||
|
||
case "InvalidOperationException":
|
||
if (message.Contains("stream") || message.Contains("closed"))
|
||
{
|
||
return "音頻流狀態異常。建議:重新上傳音頻檔案";
|
||
}
|
||
return "音頻處理操作無效。建議:檢查音頻檔案格式和完整性";
|
||
|
||
case "IOException":
|
||
return "音頻檔案讀取失敗。建議:檢查檔案是否損壞或被其他程序佔用";
|
||
|
||
case "UnauthorizedAccessException":
|
||
return "音頻檔案存取權限不足。建議:檢查檔案權限設定";
|
||
|
||
case "NotSupportedException":
|
||
return "音頻格式不被支援。建議:使用 WAV、MP3 或 WebM 格式";
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
// 根據錯誤訊息內容分析
|
||
if (message.Contains("format") || message.Contains("encoding"))
|
||
{
|
||
return "音頻編碼格式錯誤。建議:轉換為 WAV 格式(16kHz, 16-bit, mono)";
|
||
}
|
||
|
||
if (message.Contains("empty") || message.Contains("null"))
|
||
{
|
||
return "音頻數據為空。建議:重新錄製音頻或檢查上傳過程";
|
||
}
|
||
|
||
if (message.Contains("size") || message.Contains("length"))
|
||
{
|
||
var sizeInfo = audioData?.Count ?? 0;
|
||
if (sizeInfo == 0)
|
||
{
|
||
return "音頻檔案為空。建議:重新錄製音頻";
|
||
}
|
||
else if (sizeInfo < 1000)
|
||
{
|
||
return $"音頻檔案過小({sizeInfo} bytes)。建議:延長錄音時間至少 1 秒";
|
||
}
|
||
else if (sizeInfo > 10 * 1024 * 1024)
|
||
{
|
||
return $"音頻檔案過大({sizeInfo / 1024 / 1024:F1}MB)。建議:縮短錄音時間或降低音質";
|
||
}
|
||
}
|
||
|
||
if (message.Contains("timeout") || message.Contains("time"))
|
||
{
|
||
return "音頻處理超時。建議:縮短音頻長度或檢查網路連接";
|
||
}
|
||
|
||
// 根據音頻數據大小提供建議
|
||
var audioSize = audioData?.Count ?? 0;
|
||
if (audioSize == 0)
|
||
{
|
||
return "音頻處理失敗:無音頻數據。建議:重新錄製音頻並確保麥克風正常工作";
|
||
}
|
||
else if (audioSize < 100)
|
||
{
|
||
return $"音頻處理失敗:音頻數據異常小({audioSize} bytes)。建議:檢查錄音設備或重新錄製";
|
||
}
|
||
|
||
// 默認建議
|
||
return $"音頻處理失敗({exceptionType})。建議:使用 WAV 格式重新錄製,確保音頻清晰且時長 1-30 秒";
|
||
}
|
||
|
||
/// <summary>
|
||
/// 分析一般異常並提供解決建議
|
||
/// </summary>
|
||
/// <param name="ex">異常對象</param>
|
||
/// <returns>錯誤分析和解決建議</returns>
|
||
private static string AnalyzeGeneralException(Exception ex)
|
||
{
|
||
var message = ex.Message.ToLowerInvariant();
|
||
|
||
// 檢查常見的錯誤模式
|
||
if (message.Contains("error code: 0x5") || message.Contains("unauthorized") || message.Contains("forbidden"))
|
||
{
|
||
return "Azure Speech Services 認證失敗 - 檢查 SubscriptionKey 和 Region 配置";
|
||
}
|
||
|
||
if (message.Contains("error code: 0x6") || message.Contains("audio format") || message.Contains("unsupported"))
|
||
{
|
||
return "音頻格式不支援 - 使用 WAV 格式(16kHz, 16-bit, mono)";
|
||
}
|
||
|
||
if (message.Contains("error code: 0x7") || message.Contains("network") || message.Contains("connection"))
|
||
{
|
||
return "網路連接問題 - 檢查網路連接或防火牆設定";
|
||
}
|
||
|
||
if (message.Contains("timeout") || message.Contains("timed out"))
|
||
{
|
||
return "請求超時 - 縮短音頻長度或檢查網路速度";
|
||
}
|
||
|
||
if (message.Contains("quota") || message.Contains("limit") || message.Contains("throttle"))
|
||
{
|
||
return "配額超限或請求過於頻繁 - 稍後再試或升級服務方案";
|
||
}
|
||
|
||
if (message.Contains("region") || message.Contains("endpoint"))
|
||
{
|
||
return "Region 配置錯誤 - 檢查 Azure Region 是否正確";
|
||
}
|
||
|
||
if (message.Contains("audio") && (message.Contains("empty") || message.Contains("invalid")))
|
||
{
|
||
return "音頻數據無效 - 重新錄製音頻或檢查音頻格式";
|
||
}
|
||
|
||
// 默認建議
|
||
return "系統錯誤 - 檢查網路連接、音頻格式和 Azure 配置,如問題持續請聯繫技術支援";
|
||
}
|
||
|
||
/// <summary>
|
||
/// 分析 Azure Speech Services 錯誤碼並提供解決建議
|
||
/// </summary>
|
||
/// <param name="errorCode">Azure 錯誤碼</param>
|
||
/// <returns>錯誤分析和解決建議</returns>
|
||
private static string AnalyzeAzureErrorCode(string errorCode)
|
||
{
|
||
return errorCode switch
|
||
{
|
||
"BadRequest" => "請求格式不正確 - 檢查音頻格式是否為支援的格式(WAV、WebM、MP3)",
|
||
"Unauthorized" => "認證失敗 - 檢查 Azure Speech Services API Key 是否正確配置",
|
||
"Forbidden" => "權限不足 - 檢查 Azure 訂閱是否啟用 Speech Services",
|
||
"NotFound" => "找不到資源 - 檢查 Azure Region 是否正確",
|
||
"TooManyRequests" => "請求過於頻繁 - 稍後再試或升級服務方案",
|
||
"InternalServerError" => "Azure 服務內部錯誤 - 稍後再試",
|
||
"ServiceUnavailable" => "服務暫時不可用 - 檢查網路連接或稍後再試",
|
||
"0x5" => "認證錯誤 - 檢查 SubscriptionKey 和 Region 配置",
|
||
"0x6" => "音頻格式不支援 - 使用 WAV 格式(16kHz, 16-bit, mono)",
|
||
"0x7" => "網路連接問題 - 檢查網路連接或防火牆設定",
|
||
"0x8" => "音頻數據損壞 - 重新錄製音頻",
|
||
"0x9" => "超時錯誤 - 縮短音頻長度或檢查網路速度",
|
||
"0xa" => "配額超限 - 檢查 Azure 服務使用量",
|
||
_ => $"未知錯誤碼 '{errorCode}' - 檢查網路連接、音頻格式和 Azure 配置"
|
||
};
|
||
}
|
||
} |