dramaling-vocab-learning/backend/DramaLing.Api/Controllers/AudioController.cs

218 lines
6.9 KiB
C#

using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Authorization;
using DramaLing.Api.Models.Dtos;
using DramaLing.Api.Services;
namespace DramaLing.Api.Controllers;
[Route("api/[controller]")]
[Authorize]
public class AudioController : BaseController
{
private readonly IAudioCacheService _audioCacheService;
private readonly IAzureSpeechService _speechService;
public AudioController(
IAudioCacheService audioCacheService,
IAzureSpeechService speechService,
ILogger<AudioController> logger) : base(logger)
{
_audioCacheService = audioCacheService;
_speechService = speechService;
}
/// <summary>
/// Generate audio from text using TTS
/// </summary>
/// <param name="request">TTS request parameters</param>
/// <returns>Audio URL and metadata</returns>
[HttpPost("tts")]
public async Task<IActionResult> GenerateAudio([FromBody] TTSRequest request)
{
try
{
if (string.IsNullOrWhiteSpace(request.Text))
{
return BadRequest(new TTSResponse
{
Error = "Text is required"
});
}
if (request.Text.Length > 1000)
{
return BadRequest(new TTSResponse
{
Error = "Text is too long (max 1000 characters)"
});
}
if (!IsValidAccent(request.Accent))
{
return BadRequest(new TTSResponse
{
Error = "Invalid accent. Use 'us' or 'uk'"
});
}
if (request.Speed < 0.5f || request.Speed > 2.0f)
{
return BadRequest(new TTSResponse
{
Error = "Speed must be between 0.5 and 2.0"
});
}
var response = await _audioCacheService.GetOrCreateAudioAsync(request);
if (!string.IsNullOrEmpty(response.Error))
{
return ErrorResponse("TTS_ERROR", response.Error, null, 500);
}
return SuccessResponse(response);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating audio for text: {Text}", request.Text);
return StatusCode(500, new TTSResponse
{
Error = "Internal server error"
});
}
}
/// <summary>
/// Get cached audio by hash
/// </summary>
/// <param name="hash">Audio cache hash</param>
/// <returns>Cached audio URL</returns>
[HttpGet("tts/cache/{hash}")]
public async Task<ActionResult<TTSResponse>> GetCachedAudio(string hash)
{
try
{
// 實現快取查詢邏輯
// 這裡應該從資料庫查詢快取的音頻
return NotFound(new TTSResponse
{
Error = "Audio not found in cache"
});
}
catch (Exception ex)
{
_logger.LogError(ex, "Error retrieving cached audio: {Hash}", hash);
return StatusCode(500, new TTSResponse
{
Error = "Internal server error"
});
}
}
/// <summary>
/// Evaluate pronunciation from uploaded audio
/// </summary>
/// <param name="audioFile">Audio file</param>
/// <param name="targetText">Target text for pronunciation</param>
/// <param name="userLevel">User's CEFR level</param>
/// <returns>Pronunciation assessment results</returns>
[HttpPost("pronunciation/evaluate")]
public async Task<ActionResult<PronunciationResponse>> EvaluatePronunciation(
IFormFile audioFile,
[FromForm] string targetText,
[FromForm] string userLevel = "B1")
{
try
{
if (audioFile == null || audioFile.Length == 0)
{
return BadRequest(new PronunciationResponse
{
Error = "Audio file is required"
});
}
if (string.IsNullOrWhiteSpace(targetText))
{
return BadRequest(new PronunciationResponse
{
Error = "Target text is required"
});
}
// 檢查檔案大小 (最大 10MB)
if (audioFile.Length > 10 * 1024 * 1024)
{
return BadRequest(new PronunciationResponse
{
Error = "Audio file is too large (max 10MB)"
});
}
// 檢查檔案類型
var allowedTypes = new[] { "audio/wav", "audio/mp3", "audio/mpeg", "audio/ogg" };
if (!allowedTypes.Contains(audioFile.ContentType))
{
return BadRequest(new PronunciationResponse
{
Error = "Invalid audio format. Use WAV, MP3, or OGG"
});
}
using var audioStream = audioFile.OpenReadStream();
var request = new PronunciationRequest
{
TargetText = targetText,
UserLevel = userLevel
};
var response = await _speechService.EvaluatePronunciationAsync(audioStream, request);
if (!string.IsNullOrEmpty(response.Error))
{
return StatusCode(500, response);
}
return Ok(response);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error evaluating pronunciation for text: {Text}", targetText);
return StatusCode(500, new PronunciationResponse
{
Error = "Internal server error"
});
}
}
/// <summary>
/// Get supported voices for TTS
/// </summary>
/// <returns>List of available voices</returns>
[HttpGet("voices")]
public ActionResult<object> GetVoices()
{
var voices = new
{
US = new[]
{
new { Id = "en-US-AriaNeural", Name = "Aria", Gender = "Female" },
new { Id = "en-US-GuyNeural", Name = "Guy", Gender = "Male" },
new { Id = "en-US-JennyNeural", Name = "Jenny", Gender = "Female" }
},
UK = new[]
{
new { Id = "en-GB-SoniaNeural", Name = "Sonia", Gender = "Female" },
new { Id = "en-GB-RyanNeural", Name = "Ryan", Gender = "Male" },
new { Id = "en-GB-LibbyNeural", Name = "Libby", Gender = "Female" }
}
};
return Ok(voices);
}
private static bool IsValidAccent(string accent)
{
return accent?.ToLower() is "us" or "uk";
}
}