dramaling-vocab-learning/backend/DramaLing.Api/Services/Media/Audio/AzureSpeechService.cs

191 lines
6.1 KiB
C#

using DramaLing.Api.Models.Dtos;
using System.Text;
using System.Security.Cryptography;
namespace DramaLing.Api.Services;
public interface IAzureSpeechService
{
Task<TTSResponse> GenerateAudioAsync(TTSRequest request);
Task<PronunciationResponse> EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request);
}
public class AzureSpeechService : IAzureSpeechService
{
private readonly IConfiguration _configuration;
private readonly ILogger<AzureSpeechService> _logger;
private readonly bool _isConfigured;
public AzureSpeechService(IConfiguration configuration, ILogger<AzureSpeechService> logger)
{
_configuration = configuration;
_logger = logger;
var subscriptionKey = _configuration["Azure:Speech:SubscriptionKey"];
var region = _configuration["Azure:Speech:Region"];
if (string.IsNullOrEmpty(subscriptionKey) || string.IsNullOrEmpty(region))
{
_logger.LogWarning("Azure Speech configuration is missing. TTS functionality will be disabled.");
_isConfigured = false;
return;
}
_isConfigured = true;
_logger.LogInformation("Azure Speech service configured for region: {Region}", region);
}
public async Task<TTSResponse> GenerateAudioAsync(TTSRequest request)
{
try
{
if (!_isConfigured)
{
return new TTSResponse
{
Error = "Azure Speech service is not configured"
};
}
// 模擬 TTS 處理,返回模擬數據
await Task.Delay(500); // 模擬 API 延遲
// 生成模擬的 base64 音頻數據 (實際上是空的 MP3 標頭)
var mockAudioData = Convert.ToBase64String(new byte[] {
0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
});
var audioUrl = $"data:audio/mp3;base64,{mockAudioData}";
return new TTSResponse
{
AudioUrl = audioUrl,
Duration = CalculateAudioDuration(request.Text.Length),
CacheHit = false
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating audio for text: {Text}", request.Text);
return new TTSResponse
{
Error = "Internal error generating audio"
};
}
}
public async Task<PronunciationResponse> EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request)
{
try
{
if (!_isConfigured)
{
return new PronunciationResponse
{
Error = "Azure Speech service is not configured"
};
}
// 模擬語音評估處理
await Task.Delay(2000); // 模擬 API 調用延遲
// 生成模擬的評分數據
var random = new Random();
var overallScore = random.Next(75, 95);
return new PronunciationResponse
{
OverallScore = overallScore,
Accuracy = (float)(random.NextDouble() * 20 + 75),
Fluency = (float)(random.NextDouble() * 20 + 75),
Completeness = (float)(random.NextDouble() * 20 + 75),
Prosody = (float)(random.NextDouble() * 20 + 75),
PhonemeScores = GenerateMockPhonemeScores(request.TargetText),
Suggestions = GenerateMockSuggestions(overallScore)
};
}
catch (Exception ex)
{
_logger.LogError(ex, "Error evaluating pronunciation for text: {Text}", request.TargetText);
return new PronunciationResponse
{
Error = "Internal error evaluating pronunciation"
};
}
}
private List<PhonemeScore> GenerateMockPhonemeScores(string text)
{
var phonemes = new List<PhonemeScore>();
var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
foreach (var word in words.Take(3)) // 只處理前3個詞
{
phonemes.Add(new PhonemeScore
{
Phoneme = $"/{word[0]}/",
Score = Random.Shared.Next(70, 95),
Suggestion = Random.Shared.Next(0, 3) == 0 ? $"注意 {word} 的發音" : null
});
}
return phonemes;
}
private List<string> GenerateMockSuggestions(int overallScore)
{
var suggestions = new List<string>();
if (overallScore < 85)
{
suggestions.Add("注意單詞的重音位置");
}
if (overallScore < 80)
{
suggestions.Add("發音可以更清晰一些");
suggestions.Add("嘗試放慢語速,確保每個音都發準");
}
if (overallScore >= 90)
{
suggestions.Add("發音很棒!繼續保持");
}
return suggestions;
}
private string GetVoiceName(string accent, string voicePreference)
{
return accent.ToLower() switch
{
"uk" => "en-GB-SoniaNeural",
"us" => "en-US-AriaNeural",
_ => "en-US-AriaNeural"
};
}
private string CreateSSML(string text, string voice, float speed)
{
var rate = speed switch
{
< 0.8f => "slow",
> 1.2f => "fast",
_ => "medium"
};
return $@"
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
<voice name='{voice}'>
<prosody rate='{rate}'>
{text}
</prosody>
</voice>
</speak>";
}
private float CalculateAudioDuration(int textLength)
{
// 根據文字長度估算音頻時長:平均每個字符 0.1 秒
return Math.Max(1.0f, textLength * 0.1f);
}
}