using DramaLing.Api.Models.Dtos; using System.Text; using System.Security.Cryptography; namespace DramaLing.Api.Services; public interface IAzureSpeechService { Task GenerateAudioAsync(TTSRequest request); Task EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request); } public class AzureSpeechService : IAzureSpeechService { private readonly IConfiguration _configuration; private readonly ILogger _logger; private readonly bool _isConfigured; public AzureSpeechService(IConfiguration configuration, ILogger logger) { _configuration = configuration; _logger = logger; var subscriptionKey = _configuration["Azure:Speech:SubscriptionKey"]; var region = _configuration["Azure:Speech:Region"]; if (string.IsNullOrEmpty(subscriptionKey) || string.IsNullOrEmpty(region)) { _logger.LogWarning("Azure Speech configuration is missing. TTS functionality will be disabled."); _isConfigured = false; return; } _isConfigured = true; _logger.LogInformation("Azure Speech service configured for region: {Region}", region); } public async Task GenerateAudioAsync(TTSRequest request) { try { if (!_isConfigured) { return new TTSResponse { Error = "Azure Speech service is not configured" }; } // 模擬 TTS 處理,返回模擬數據 await Task.Delay(500); // 模擬 API 延遲 // 生成模擬的 base64 音頻數據 (實際上是空的 MP3 標頭) var mockAudioData = Convert.ToBase64String(new byte[] { 0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }); var audioUrl = $"data:audio/mp3;base64,{mockAudioData}"; return new TTSResponse { AudioUrl = audioUrl, Duration = CalculateAudioDuration(request.Text.Length), CacheHit = false }; } catch (Exception ex) { _logger.LogError(ex, "Error generating audio for text: {Text}", request.Text); return new TTSResponse { Error = "Internal error generating audio" }; } } public async Task EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request) { try { if (!_isConfigured) { return new PronunciationResponse { Error = "Azure Speech service is not configured" }; } // 模擬語音評估處理 await Task.Delay(2000); // 模擬 API 調用延遲 // 生成模擬的評分數據 var random = new Random(); var overallScore = random.Next(75, 95); return new PronunciationResponse { OverallScore = overallScore, Accuracy = (float)(random.NextDouble() * 20 + 75), Fluency = (float)(random.NextDouble() * 20 + 75), Completeness = (float)(random.NextDouble() * 20 + 75), Prosody = (float)(random.NextDouble() * 20 + 75), PhonemeScores = GenerateMockPhonemeScores(request.TargetText), Suggestions = GenerateMockSuggestions(overallScore) }; } catch (Exception ex) { _logger.LogError(ex, "Error evaluating pronunciation for text: {Text}", request.TargetText); return new PronunciationResponse { Error = "Internal error evaluating pronunciation" }; } } private List GenerateMockPhonemeScores(string text) { var phonemes = new List(); var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries); foreach (var word in words.Take(3)) // 只處理前3個詞 { phonemes.Add(new PhonemeScore { Phoneme = $"/{word[0]}/", Score = Random.Shared.Next(70, 95), Suggestion = Random.Shared.Next(0, 3) == 0 ? $"注意 {word} 的發音" : null }); } return phonemes; } private List GenerateMockSuggestions(int overallScore) { var suggestions = new List(); if (overallScore < 85) { suggestions.Add("注意單詞的重音位置"); } if (overallScore < 80) { suggestions.Add("發音可以更清晰一些"); suggestions.Add("嘗試放慢語速,確保每個音都發準"); } if (overallScore >= 90) { suggestions.Add("發音很棒!繼續保持"); } return suggestions; } private string GetVoiceName(string accent, string voicePreference) { return accent.ToLower() switch { "uk" => "en-GB-SoniaNeural", "us" => "en-US-AriaNeural", _ => "en-US-AriaNeural" }; } private string CreateSSML(string text, string voice, float speed) { var rate = speed switch { < 0.8f => "slow", > 1.2f => "fast", _ => "medium" }; return $@" {text} "; } private float CalculateAudioDuration(int textLength) { // 根據文字長度估算音頻時長:平均每個字符 0.1 秒 return Math.Max(1.0f, textLength * 0.1f); } }