199 lines
7.5 KiB
C#
199 lines
7.5 KiB
C#
using Microsoft.CognitiveServices.Speech;
|
|
using Microsoft.CognitiveServices.Speech.Audio;
|
|
using Microsoft.CognitiveServices.Speech.PronunciationAssessment;
|
|
using Microsoft.Extensions.Options;
|
|
using System.Diagnostics;
|
|
using DramaLing.Api.Models.Configuration;
|
|
using DramaLing.Api.Models.DTOs;
|
|
using DramaLing.Api.Contracts.Services.Speech;
|
|
|
|
namespace DramaLing.Api.Services.Speech;
|
|
|
|
public class AzurePronunciationAssessmentService : IPronunciationAssessmentService
|
|
{
|
|
private readonly AzureSpeechOptions _options;
|
|
private readonly ILogger<AzurePronunciationAssessmentService> _logger;
|
|
|
|
public AzurePronunciationAssessmentService(
|
|
IOptions<AzureSpeechOptions> options,
|
|
ILogger<AzurePronunciationAssessmentService> logger)
|
|
{
|
|
_options = options.Value;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<PronunciationResult> EvaluatePronunciationAsync(
|
|
Stream audioStream,
|
|
string referenceText,
|
|
string flashcardId,
|
|
string language = "en-US")
|
|
{
|
|
var stopwatch = Stopwatch.StartNew();
|
|
|
|
try
|
|
{
|
|
_logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language);
|
|
|
|
// 1. 設定 Azure Speech Config
|
|
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
|
|
speechConfig.SpeechRecognitionLanguage = language;
|
|
|
|
// 2. 設定發音評估參數
|
|
var pronunciationConfig = new PronunciationAssessmentConfig(
|
|
referenceText,
|
|
GradingSystem.HundredMark,
|
|
Granularity.Word,
|
|
enableMiscue: true
|
|
);
|
|
|
|
// 3. 建立音頻輸入流
|
|
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
|
|
var audioInputStream = AudioInputStream.CreatePushStream(audioFormat);
|
|
|
|
// 將 Stream 數據複製到 Azure AudioInputStream
|
|
var buffer = new byte[4096];
|
|
int bytesRead;
|
|
while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length)) > 0)
|
|
{
|
|
audioInputStream.Write(buffer, bytesRead);
|
|
}
|
|
audioInputStream.Close();
|
|
|
|
// 4. 設定音頻配置
|
|
using var audioConfig = AudioConfig.FromStreamInput(audioInputStream);
|
|
|
|
// 5. 建立語音識別器
|
|
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
|
|
pronunciationConfig.ApplyTo(recognizer);
|
|
|
|
// 6. 執行語音識別和發音評估
|
|
var result = await recognizer.RecognizeOnceAsync();
|
|
stopwatch.Stop();
|
|
|
|
// 7. 檢查結果
|
|
if (result.Reason == ResultReason.RecognizedSpeech)
|
|
{
|
|
var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
|
|
|
|
// 8. 轉換為系統格式
|
|
var assessmentResult = new PronunciationResult
|
|
{
|
|
AssessmentId = Guid.NewGuid().ToString(),
|
|
FlashcardId = flashcardId,
|
|
ReferenceText = referenceText,
|
|
TranscribedText = result.Text,
|
|
Scores = new PronunciationScores
|
|
{
|
|
Overall = pronunciationResult.AccuracyScore,
|
|
Accuracy = pronunciationResult.AccuracyScore,
|
|
Fluency = pronunciationResult.FluencyScore,
|
|
Completeness = pronunciationResult.CompletenessScore,
|
|
Prosody = pronunciationResult.ProsodyScore
|
|
},
|
|
ProcessingTime = stopwatch.ElapsedMilliseconds,
|
|
ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore)
|
|
};
|
|
|
|
// 9. 處理詞彙級別結果
|
|
if (pronunciationResult.Words != null)
|
|
{
|
|
assessmentResult.WordLevelResults = pronunciationResult.Words
|
|
.Select(word => new WordLevelResult
|
|
{
|
|
Word = word.Word,
|
|
AccuracyScore = word.AccuracyScore,
|
|
ErrorType = word.ErrorType.ToString()
|
|
})
|
|
.ToList();
|
|
}
|
|
|
|
// 10. 生成反饋建議
|
|
assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults);
|
|
|
|
_logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
|
|
pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds);
|
|
|
|
return assessmentResult;
|
|
}
|
|
else if (result.Reason == ResultReason.NoMatch)
|
|
{
|
|
throw new InvalidOperationException("未檢測到語音,請確保音頻清晰並重新錄製");
|
|
}
|
|
else
|
|
{
|
|
throw new InvalidOperationException($"語音識別失敗: {result.Reason}");
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "發音評估錯誤: FlashcardId={FlashcardId}", flashcardId);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<bool> IsServiceAvailableAsync()
|
|
{
|
|
try
|
|
{
|
|
if (string.IsNullOrEmpty(_options.SubscriptionKey))
|
|
{
|
|
_logger.LogWarning("Azure Speech Services 未配置");
|
|
return false;
|
|
}
|
|
|
|
// 簡單的服務可用性檢查
|
|
var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
|
|
return !string.IsNullOrEmpty(speechConfig.Region);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static int MapScoreToConfidence(double overallScore)
|
|
{
|
|
return overallScore switch
|
|
{
|
|
>= 85 => 2, // 優秀 (高信心)
|
|
>= 70 => 1, // 良好 (中信心)
|
|
_ => 0 // 需改善 (低信心)
|
|
};
|
|
}
|
|
|
|
private static List<string> GenerateFeedback(PronunciationScores scores, List<WordLevelResult> wordResults)
|
|
{
|
|
var feedback = new List<string>();
|
|
|
|
// 整體評價
|
|
if (scores.Overall >= 90)
|
|
feedback.Add("🎉 發音表現優秀!");
|
|
else if (scores.Overall >= 80)
|
|
feedback.Add("👍 發音表現良好");
|
|
else if (scores.Overall >= 70)
|
|
feedback.Add("📈 發音有進步空間");
|
|
else
|
|
feedback.Add("💪 建議多加練習發音");
|
|
|
|
// 具體建議
|
|
if (scores.Accuracy < 70)
|
|
feedback.Add("注意發音準確度,可以多聽標準發音範例");
|
|
|
|
if (scores.Fluency < 70)
|
|
feedback.Add("嘗試讓語速更自然流暢");
|
|
|
|
if (scores.Prosody < 70)
|
|
feedback.Add("注意語調和重音的掌握");
|
|
|
|
// 詞彙級別建議
|
|
var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList();
|
|
if (problemWords.Any())
|
|
{
|
|
var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'"));
|
|
feedback.Add($"重點練習: {wordList}");
|
|
}
|
|
|
|
return feedback;
|
|
}
|
|
} |