191 lines
6.1 KiB
C#
191 lines
6.1 KiB
C#
using DramaLing.Api.Models.Dtos;
|
|
using System.Text;
|
|
using System.Security.Cryptography;
|
|
|
|
namespace DramaLing.Api.Services;
|
|
|
|
public interface IAzureSpeechService
|
|
{
|
|
Task<TTSResponse> GenerateAudioAsync(TTSRequest request);
|
|
Task<PronunciationResponse> EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request);
|
|
}
|
|
|
|
public class AzureSpeechService : IAzureSpeechService
|
|
{
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<AzureSpeechService> _logger;
|
|
private readonly bool _isConfigured;
|
|
|
|
public AzureSpeechService(IConfiguration configuration, ILogger<AzureSpeechService> logger)
|
|
{
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
|
|
var subscriptionKey = _configuration["Azure:Speech:SubscriptionKey"];
|
|
var region = _configuration["Azure:Speech:Region"];
|
|
|
|
if (string.IsNullOrEmpty(subscriptionKey) || string.IsNullOrEmpty(region))
|
|
{
|
|
_logger.LogWarning("Azure Speech configuration is missing. TTS functionality will be disabled.");
|
|
_isConfigured = false;
|
|
return;
|
|
}
|
|
|
|
_isConfigured = true;
|
|
_logger.LogInformation("Azure Speech service configured for region: {Region}", region);
|
|
}
|
|
|
|
public async Task<TTSResponse> GenerateAudioAsync(TTSRequest request)
|
|
{
|
|
try
|
|
{
|
|
if (!_isConfigured)
|
|
{
|
|
return new TTSResponse
|
|
{
|
|
Error = "Azure Speech service is not configured"
|
|
};
|
|
}
|
|
|
|
// 模擬 TTS 處理,返回模擬數據
|
|
await Task.Delay(500); // 模擬 API 延遲
|
|
|
|
// 生成模擬的 base64 音頻數據 (實際上是空的 MP3 標頭)
|
|
var mockAudioData = Convert.ToBase64String(new byte[] {
|
|
0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
|
});
|
|
var audioUrl = $"data:audio/mp3;base64,{mockAudioData}";
|
|
|
|
return new TTSResponse
|
|
{
|
|
AudioUrl = audioUrl,
|
|
Duration = CalculateAudioDuration(request.Text.Length),
|
|
CacheHit = false
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error generating audio for text: {Text}", request.Text);
|
|
return new TTSResponse
|
|
{
|
|
Error = "Internal error generating audio"
|
|
};
|
|
}
|
|
}
|
|
|
|
public async Task<PronunciationResponse> EvaluatePronunciationAsync(Stream audioStream, PronunciationRequest request)
|
|
{
|
|
try
|
|
{
|
|
if (!_isConfigured)
|
|
{
|
|
return new PronunciationResponse
|
|
{
|
|
Error = "Azure Speech service is not configured"
|
|
};
|
|
}
|
|
|
|
// 模擬語音評估處理
|
|
await Task.Delay(2000); // 模擬 API 調用延遲
|
|
|
|
// 生成模擬的評分數據
|
|
var random = new Random();
|
|
var overallScore = random.Next(75, 95);
|
|
|
|
return new PronunciationResponse
|
|
{
|
|
OverallScore = overallScore,
|
|
Accuracy = (float)(random.NextDouble() * 20 + 75),
|
|
Fluency = (float)(random.NextDouble() * 20 + 75),
|
|
Completeness = (float)(random.NextDouble() * 20 + 75),
|
|
Prosody = (float)(random.NextDouble() * 20 + 75),
|
|
PhonemeScores = GenerateMockPhonemeScores(request.TargetText),
|
|
Suggestions = GenerateMockSuggestions(overallScore)
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error evaluating pronunciation for text: {Text}", request.TargetText);
|
|
return new PronunciationResponse
|
|
{
|
|
Error = "Internal error evaluating pronunciation"
|
|
};
|
|
}
|
|
}
|
|
|
|
private List<PhonemeScore> GenerateMockPhonemeScores(string text)
|
|
{
|
|
var phonemes = new List<PhonemeScore>();
|
|
var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
|
|
|
foreach (var word in words.Take(3)) // 只處理前3個詞
|
|
{
|
|
phonemes.Add(new PhonemeScore
|
|
{
|
|
Phoneme = $"/{word[0]}/",
|
|
Score = Random.Shared.Next(70, 95),
|
|
Suggestion = Random.Shared.Next(0, 3) == 0 ? $"注意 {word} 的發音" : null
|
|
});
|
|
}
|
|
|
|
return phonemes;
|
|
}
|
|
|
|
private List<string> GenerateMockSuggestions(int overallScore)
|
|
{
|
|
var suggestions = new List<string>();
|
|
|
|
if (overallScore < 85)
|
|
{
|
|
suggestions.Add("注意單詞的重音位置");
|
|
}
|
|
|
|
if (overallScore < 80)
|
|
{
|
|
suggestions.Add("發音可以更清晰一些");
|
|
suggestions.Add("嘗試放慢語速,確保每個音都發準");
|
|
}
|
|
|
|
if (overallScore >= 90)
|
|
{
|
|
suggestions.Add("發音很棒!繼續保持");
|
|
}
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
private string GetVoiceName(string accent, string voicePreference)
|
|
{
|
|
return accent.ToLower() switch
|
|
{
|
|
"uk" => "en-GB-SoniaNeural",
|
|
"us" => "en-US-AriaNeural",
|
|
_ => "en-US-AriaNeural"
|
|
};
|
|
}
|
|
|
|
private string CreateSSML(string text, string voice, float speed)
|
|
{
|
|
var rate = speed switch
|
|
{
|
|
< 0.8f => "slow",
|
|
> 1.2f => "fast",
|
|
_ => "medium"
|
|
};
|
|
|
|
return $@"
|
|
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
|
|
<voice name='{voice}'>
|
|
<prosody rate='{rate}'>
|
|
{text}
|
|
</prosody>
|
|
</voice>
|
|
</speak>";
|
|
}
|
|
|
|
private float CalculateAudioDuration(int textLength)
|
|
{
|
|
// 根據文字長度估算音頻時長:平均每個字符 0.1 秒
|
|
return Math.Max(1.0f, textLength * 0.1f);
|
|
}
|
|
} |