dramaling-vocab-learning/backend/DramaLing.Api/Services/Speech/AzurePronunciationAssessmen...

199 lines
7.5 KiB
C#

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.PronunciationAssessment;
using Microsoft.Extensions.Options;
using System.Diagnostics;
using DramaLing.Api.Models.Configuration;
using DramaLing.Api.Models.DTOs;
using DramaLing.Api.Contracts.Services.Speech;
namespace DramaLing.Api.Services.Speech;
public class AzurePronunciationAssessmentService : IPronunciationAssessmentService
{
private readonly AzureSpeechOptions _options;
private readonly ILogger<AzurePronunciationAssessmentService> _logger;
public AzurePronunciationAssessmentService(
IOptions<AzureSpeechOptions> options,
ILogger<AzurePronunciationAssessmentService> logger)
{
_options = options.Value;
_logger = logger;
}
public async Task<PronunciationResult> EvaluatePronunciationAsync(
Stream audioStream,
string referenceText,
string flashcardId,
string language = "en-US")
{
var stopwatch = Stopwatch.StartNew();
try
{
_logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language);
// 1. 設定 Azure Speech Config
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
speechConfig.SpeechRecognitionLanguage = language;
// 2. 設定發音評估參數
var pronunciationConfig = new PronunciationAssessmentConfig(
referenceText,
GradingSystem.HundredMark,
Granularity.Word,
enableMiscue: true
);
// 3. 建立音頻輸入流
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
var audioInputStream = AudioInputStream.CreatePushStream(audioFormat);
// 將 Stream 數據複製到 Azure AudioInputStream
var buffer = new byte[4096];
int bytesRead;
while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
audioInputStream.Write(buffer, bytesRead);
}
audioInputStream.Close();
// 4. 設定音頻配置
using var audioConfig = AudioConfig.FromStreamInput(audioInputStream);
// 5. 建立語音識別器
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
pronunciationConfig.ApplyTo(recognizer);
// 6. 執行語音識別和發音評估
var result = await recognizer.RecognizeOnceAsync();
stopwatch.Stop();
// 7. 檢查結果
if (result.Reason == ResultReason.RecognizedSpeech)
{
var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
// 8. 轉換為系統格式
var assessmentResult = new PronunciationResult
{
AssessmentId = Guid.NewGuid().ToString(),
FlashcardId = flashcardId,
ReferenceText = referenceText,
TranscribedText = result.Text,
Scores = new PronunciationScores
{
Overall = pronunciationResult.AccuracyScore,
Accuracy = pronunciationResult.AccuracyScore,
Fluency = pronunciationResult.FluencyScore,
Completeness = pronunciationResult.CompletenessScore,
Prosody = pronunciationResult.ProsodyScore
},
ProcessingTime = stopwatch.ElapsedMilliseconds,
ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore)
};
// 9. 處理詞彙級別結果
if (pronunciationResult.Words != null)
{
assessmentResult.WordLevelResults = pronunciationResult.Words
.Select(word => new WordLevelResult
{
Word = word.Word,
AccuracyScore = word.AccuracyScore,
ErrorType = word.ErrorType.ToString()
})
.ToList();
}
// 10. 生成反饋建議
assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults);
_logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds);
return assessmentResult;
}
else if (result.Reason == ResultReason.NoMatch)
{
throw new InvalidOperationException("未檢測到語音,請確保音頻清晰並重新錄製");
}
else
{
throw new InvalidOperationException($"語音識別失敗: {result.Reason}");
}
}
catch (Exception ex)
{
_logger.LogError(ex, "發音評估錯誤: FlashcardId={FlashcardId}", flashcardId);
throw;
}
}
public async Task<bool> IsServiceAvailableAsync()
{
try
{
if (string.IsNullOrEmpty(_options.SubscriptionKey))
{
_logger.LogWarning("Azure Speech Services 未配置");
return false;
}
// 簡單的服務可用性檢查
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
return !string.IsNullOrEmpty(speechConfig.Region);
}
catch (Exception ex)
{
_logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤");
return false;
}
}
private static int MapScoreToConfidence(double overallScore)
{
return overallScore switch
{
>= 85 => 2, // 優秀 (高信心)
>= 70 => 1, // 良好 (中信心)
_ => 0 // 需改善 (低信心)
};
}
private static List<string> GenerateFeedback(PronunciationScores scores, List<WordLevelResult> wordResults)
{
var feedback = new List<string>();
// 整體評價
if (scores.Overall >= 90)
feedback.Add("🎉 發音表現優秀!");
else if (scores.Overall >= 80)
feedback.Add("👍 發音表現良好");
else if (scores.Overall >= 70)
feedback.Add("📈 發音有進步空間");
else
feedback.Add("💪 建議多加練習發音");
// 具體建議
if (scores.Accuracy < 70)
feedback.Add("注意發音準確度,可以多聽標準發音範例");
if (scores.Fluency < 70)
feedback.Add("嘗試讓語速更自然流暢");
if (scores.Prosody < 70)
feedback.Add("注意語調和重音的掌握");
// 詞彙級別建議
var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList();
if (problemWords.Any())
{
var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'"));
feedback.Add($"重點練習: {wordList}");
}
return feedback;
}
}