using Microsoft.CognitiveServices.Speech; using Microsoft.CognitiveServices.Speech.Audio; using Microsoft.CognitiveServices.Speech.PronunciationAssessment; using Microsoft.Extensions.Options; using System.Diagnostics; using DramaLing.Api.Models.Configuration; using DramaLing.Api.Models.DTOs; using DramaLing.Api.Contracts.Services.Speech; namespace DramaLing.Api.Services.Speech; public class AzurePronunciationAssessmentService : IPronunciationAssessmentService { private readonly AzureSpeechOptions _options; private readonly ILogger _logger; public AzurePronunciationAssessmentService( IOptions options, ILogger logger) { _options = options.Value; _logger = logger; } public async Task EvaluatePronunciationAsync( Stream audioStream, string referenceText, string flashcardId, string language = "en-US") { var stopwatch = Stopwatch.StartNew(); try { _logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language); // 1. 設定 Azure Speech Config var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region); speechConfig.SpeechRecognitionLanguage = language; // 2. 設定發音評估參數 var pronunciationConfig = new PronunciationAssessmentConfig( referenceText, GradingSystem.HundredMark, Granularity.Word, enableMiscue: true ); // 3. 建立音頻輸入流 var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioInputStream = AudioInputStream.CreatePushStream(audioFormat); // 將 Stream 數據複製到 Azure AudioInputStream var buffer = new byte[4096]; int bytesRead; while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length)) > 0) { audioInputStream.Write(buffer, bytesRead); } audioInputStream.Close(); // 4. 設定音頻配置 using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); // 5. 建立語音識別器 using var recognizer = new SpeechRecognizer(speechConfig, audioConfig); pronunciationConfig.ApplyTo(recognizer); // 6. 執行語音識別和發音評估 var result = await recognizer.RecognizeOnceAsync(); stopwatch.Stop(); // 7. 檢查結果 if (result.Reason == ResultReason.RecognizedSpeech) { var pronunciationResult = PronunciationAssessmentResult.FromResult(result); // 8. 轉換為系統格式 var assessmentResult = new PronunciationResult { AssessmentId = Guid.NewGuid().ToString(), FlashcardId = flashcardId, ReferenceText = referenceText, TranscribedText = result.Text, Scores = new PronunciationScores { Overall = pronunciationResult.AccuracyScore, Accuracy = pronunciationResult.AccuracyScore, Fluency = pronunciationResult.FluencyScore, Completeness = pronunciationResult.CompletenessScore, Prosody = pronunciationResult.ProsodyScore }, ProcessingTime = stopwatch.ElapsedMilliseconds, ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore) }; // 9. 處理詞彙級別結果 if (pronunciationResult.Words != null) { assessmentResult.WordLevelResults = pronunciationResult.Words .Select(word => new WordLevelResult { Word = word.Word, AccuracyScore = word.AccuracyScore, ErrorType = word.ErrorType.ToString() }) .ToList(); } // 10. 生成反饋建議 assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults); _logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms", pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds); return assessmentResult; } else if (result.Reason == ResultReason.NoMatch) { throw new InvalidOperationException("未檢測到語音,請確保音頻清晰並重新錄製"); } else { throw new InvalidOperationException($"語音識別失敗: {result.Reason}"); } } catch (Exception ex) { _logger.LogError(ex, "發音評估錯誤: FlashcardId={FlashcardId}", flashcardId); throw; } } public async Task IsServiceAvailableAsync() { try { if (string.IsNullOrEmpty(_options.SubscriptionKey)) { _logger.LogWarning("Azure Speech Services 未配置"); return false; } // 簡單的服務可用性檢查 var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region); return !string.IsNullOrEmpty(speechConfig.Region); } catch (Exception ex) { _logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤"); return false; } } private static int MapScoreToConfidence(double overallScore) { return overallScore switch { >= 85 => 2, // 優秀 (高信心) >= 70 => 1, // 良好 (中信心) _ => 0 // 需改善 (低信心) }; } private static List GenerateFeedback(PronunciationScores scores, List wordResults) { var feedback = new List(); // 整體評價 if (scores.Overall >= 90) feedback.Add("🎉 發音表現優秀!"); else if (scores.Overall >= 80) feedback.Add("👍 發音表現良好"); else if (scores.Overall >= 70) feedback.Add("📈 發音有進步空間"); else feedback.Add("💪 建議多加練習發音"); // 具體建議 if (scores.Accuracy < 70) feedback.Add("注意發音準確度,可以多聽標準發音範例"); if (scores.Fluency < 70) feedback.Add("嘗試讓語速更自然流暢"); if (scores.Prosody < 70) feedback.Add("注意語調和重音的掌握"); // 詞彙級別建議 var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList(); if (problemWords.Any()) { var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'")); feedback.Add($"重點練習: {wordList}"); } return feedback; } }