dramaling-vocab-learning/backend/DramaLing.Api/Services/Speech/AzurePronunciationAssessmen...

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.PronunciationAssessment;
using Microsoft.Extensions.Options;
using System.Diagnostics;
using DramaLing.Api.Models.Configuration;
using DramaLing.Api.Models.DTOs;
using DramaLing.Api.Contracts.Services.Speech;

namespace DramaLing.Api.Services.Speech;

public class AzurePronunciationAssessmentService : IPronunciationAssessmentService
{
    private readonly AzureSpeechOptions _options;
    private readonly ILogger<AzurePronunciationAssessmentService> _logger;

    public AzurePronunciationAssessmentService(
        IOptions<AzureSpeechOptions> options,
        ILogger<AzurePronunciationAssessmentService> logger)
    {
        _options = options.Value;
        _logger = logger;
    }

    public async Task<PronunciationResult> EvaluatePronunciationAsync(
        Stream audioStream,
        string referenceText,
        string flashcardId,
        string language = "en-US")
    {
        var stopwatch = Stopwatch.StartNew();

        try
        {
            _logger.LogInformation("開始發音評估: FlashcardId={FlashcardId}, Language={Language}", flashcardId, language);

            // 1. 設定 Azure Speech Config
            var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
            speechConfig.SpeechRecognitionLanguage = language;

            // 2. 設定發音評估參數
            var pronunciationConfig = new PronunciationAssessmentConfig(
                referenceText,
                GradingSystem.HundredMark,
                Granularity.Word,
                enableMiscue: true
            );

            // 3. 建立音頻輸入流
            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
            var audioInputStream = AudioInputStream.CreatePushStream(audioFormat);

            // 將 Stream 數據複製到 Azure AudioInputStream
            var buffer = new byte[4096];
            int bytesRead;
            while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length)) > 0)
            {
                audioInputStream.Write(buffer, bytesRead);
            }
            audioInputStream.Close();

            // 4. 設定音頻配置
            using var audioConfig = AudioConfig.FromStreamInput(audioInputStream);

            // 5. 建立語音識別器
            using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
            pronunciationConfig.ApplyTo(recognizer);

            // 6. 執行語音識別和發音評估
            var result = await recognizer.RecognizeOnceAsync();
            stopwatch.Stop();

            // 7. 檢查結果
            if (result.Reason == ResultReason.RecognizedSpeech)
            {
                var pronunciationResult = PronunciationAssessmentResult.FromResult(result);

                // 8. 轉換為系統格式
                var assessmentResult = new PronunciationResult
                {
                    AssessmentId = Guid.NewGuid().ToString(),
                    FlashcardId = flashcardId,
                    ReferenceText = referenceText,
                    TranscribedText = result.Text,
                    Scores = new PronunciationScores
                    {
                        Overall = pronunciationResult.AccuracyScore,
                        Accuracy = pronunciationResult.AccuracyScore,
                        Fluency = pronunciationResult.FluencyScore,
                        Completeness = pronunciationResult.CompletenessScore,
                        Prosody = pronunciationResult.ProsodyScore
                    },
                    ProcessingTime = stopwatch.ElapsedMilliseconds,
                    ConfidenceLevel = MapScoreToConfidence(pronunciationResult.AccuracyScore)
                };

                // 9. 處理詞彙級別結果
                if (pronunciationResult.Words != null)
                {
                    assessmentResult.WordLevelResults = pronunciationResult.Words
                        .Select(word => new WordLevelResult
                        {
                            Word = word.Word,
                            AccuracyScore = word.AccuracyScore,
                            ErrorType = word.ErrorType.ToString()
                        })
                        .ToList();
                }

                // 10. 生成反饋建議
                assessmentResult.Feedback = GenerateFeedback(assessmentResult.Scores, assessmentResult.WordLevelResults);

                _logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
                    pronunciationResult.AccuracyScore, stopwatch.ElapsedMilliseconds);

                return assessmentResult;
            }
            else if (result.Reason == ResultReason.NoMatch)
            {
                throw new InvalidOperationException("未檢測到語音，請確保音頻清晰並重新錄製");
            }
            else
            {
                throw new InvalidOperationException($"語音識別失敗: {result.Reason}");
            }
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "發音評估錯誤: FlashcardId={FlashcardId}", flashcardId);
            throw;
        }
    }

    public async Task<bool> IsServiceAvailableAsync()
    {
        try
        {
            if (string.IsNullOrEmpty(_options.SubscriptionKey))
            {
                _logger.LogWarning("Azure Speech Services 未配置");
                return false;
            }

            // 簡單的服務可用性檢查
            var speechConfig = SpeechConfig.FromSubscription(_options.SubscriptionKey, _options.Region);
            return !string.IsNullOrEmpty(speechConfig.Region);
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "檢查 Azure Speech Services 可用性時發生錯誤");
            return false;
        }
    }

    private static int MapScoreToConfidence(double overallScore)
    {
        return overallScore switch
        {
            >= 85 => 2, // 優秀 (高信心)
            >= 70 => 1, // 良好 (中信心)
            _ => 0      // 需改善 (低信心)
        };
    }

    private static List<string> GenerateFeedback(PronunciationScores scores, List<WordLevelResult> wordResults)
    {
        var feedback = new List<string>();

        // 整體評價
        if (scores.Overall >= 90)
            feedback.Add("🎉 發音表現優秀！");
        else if (scores.Overall >= 80)
            feedback.Add("👍 發音表現良好");
        else if (scores.Overall >= 70)
            feedback.Add("📈 發音有進步空間");
        else
            feedback.Add("💪 建議多加練習發音");

        // 具體建議
        if (scores.Accuracy < 70)
            feedback.Add("注意發音準確度，可以多聽標準發音範例");

        if (scores.Fluency < 70)
            feedback.Add("嘗試讓語速更自然流暢");

        if (scores.Prosody < 70)
            feedback.Add("注意語調和重音的掌握");

        // 詞彙級別建議
        var problemWords = wordResults.Where(w => w.AccuracyScore < 70).ToList();
        if (problemWords.Any())
        {
            var wordList = string.Join("、", problemWords.Take(3).Select(w => $"'{w.Word}'"));
            feedback.Add($"重點練習: {wordList}");
        }

        return feedback;
    }
}