218 lines
6.9 KiB
C#
218 lines
6.9 KiB
C#
using Microsoft.AspNetCore.Mvc;
|
|
using Microsoft.AspNetCore.Authorization;
|
|
using DramaLing.Api.Models.Dtos;
|
|
using DramaLing.Api.Services;
|
|
|
|
namespace DramaLing.Api.Controllers;
|
|
|
|
[Route("api/[controller]")]
|
|
[Authorize]
|
|
public class AudioController : BaseController
|
|
{
|
|
private readonly IAudioCacheService _audioCacheService;
|
|
private readonly IAzureSpeechService _speechService;
|
|
|
|
public AudioController(
|
|
IAudioCacheService audioCacheService,
|
|
IAzureSpeechService speechService,
|
|
ILogger<AudioController> logger) : base(logger)
|
|
{
|
|
_audioCacheService = audioCacheService;
|
|
_speechService = speechService;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Generate audio from text using TTS
|
|
/// </summary>
|
|
/// <param name="request">TTS request parameters</param>
|
|
/// <returns>Audio URL and metadata</returns>
|
|
[HttpPost("tts")]
|
|
public async Task<IActionResult> GenerateAudio([FromBody] TTSRequest request)
|
|
{
|
|
try
|
|
{
|
|
if (string.IsNullOrWhiteSpace(request.Text))
|
|
{
|
|
return BadRequest(new TTSResponse
|
|
{
|
|
Error = "Text is required"
|
|
});
|
|
}
|
|
|
|
if (request.Text.Length > 1000)
|
|
{
|
|
return BadRequest(new TTSResponse
|
|
{
|
|
Error = "Text is too long (max 1000 characters)"
|
|
});
|
|
}
|
|
|
|
if (!IsValidAccent(request.Accent))
|
|
{
|
|
return BadRequest(new TTSResponse
|
|
{
|
|
Error = "Invalid accent. Use 'us' or 'uk'"
|
|
});
|
|
}
|
|
|
|
if (request.Speed < 0.5f || request.Speed > 2.0f)
|
|
{
|
|
return BadRequest(new TTSResponse
|
|
{
|
|
Error = "Speed must be between 0.5 and 2.0"
|
|
});
|
|
}
|
|
|
|
var response = await _audioCacheService.GetOrCreateAudioAsync(request);
|
|
|
|
if (!string.IsNullOrEmpty(response.Error))
|
|
{
|
|
return ErrorResponse("TTS_ERROR", response.Error, null, 500);
|
|
}
|
|
|
|
return SuccessResponse(response);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error generating audio for text: {Text}", request.Text);
|
|
return StatusCode(500, new TTSResponse
|
|
{
|
|
Error = "Internal server error"
|
|
});
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Get cached audio by hash
|
|
/// </summary>
|
|
/// <param name="hash">Audio cache hash</param>
|
|
/// <returns>Cached audio URL</returns>
|
|
[HttpGet("tts/cache/{hash}")]
|
|
public async Task<ActionResult<TTSResponse>> GetCachedAudio(string hash)
|
|
{
|
|
try
|
|
{
|
|
// 實現快取查詢邏輯
|
|
// 這裡應該從資料庫查詢快取的音頻
|
|
return NotFound(new TTSResponse
|
|
{
|
|
Error = "Audio not found in cache"
|
|
});
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error retrieving cached audio: {Hash}", hash);
|
|
return StatusCode(500, new TTSResponse
|
|
{
|
|
Error = "Internal server error"
|
|
});
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Evaluate pronunciation from uploaded audio
|
|
/// </summary>
|
|
/// <param name="audioFile">Audio file</param>
|
|
/// <param name="targetText">Target text for pronunciation</param>
|
|
/// <param name="userLevel">User's CEFR level</param>
|
|
/// <returns>Pronunciation assessment results</returns>
|
|
[HttpPost("pronunciation/evaluate")]
|
|
public async Task<ActionResult<PronunciationResponse>> EvaluatePronunciation(
|
|
IFormFile audioFile,
|
|
[FromForm] string targetText,
|
|
[FromForm] string userLevel = "B1")
|
|
{
|
|
try
|
|
{
|
|
if (audioFile == null || audioFile.Length == 0)
|
|
{
|
|
return BadRequest(new PronunciationResponse
|
|
{
|
|
Error = "Audio file is required"
|
|
});
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(targetText))
|
|
{
|
|
return BadRequest(new PronunciationResponse
|
|
{
|
|
Error = "Target text is required"
|
|
});
|
|
}
|
|
|
|
// 檢查檔案大小 (最大 10MB)
|
|
if (audioFile.Length > 10 * 1024 * 1024)
|
|
{
|
|
return BadRequest(new PronunciationResponse
|
|
{
|
|
Error = "Audio file is too large (max 10MB)"
|
|
});
|
|
}
|
|
|
|
// 檢查檔案類型
|
|
var allowedTypes = new[] { "audio/wav", "audio/mp3", "audio/mpeg", "audio/ogg" };
|
|
if (!allowedTypes.Contains(audioFile.ContentType))
|
|
{
|
|
return BadRequest(new PronunciationResponse
|
|
{
|
|
Error = "Invalid audio format. Use WAV, MP3, or OGG"
|
|
});
|
|
}
|
|
|
|
using var audioStream = audioFile.OpenReadStream();
|
|
var request = new PronunciationRequest
|
|
{
|
|
TargetText = targetText,
|
|
UserLevel = userLevel
|
|
};
|
|
|
|
var response = await _speechService.EvaluatePronunciationAsync(audioStream, request);
|
|
|
|
if (!string.IsNullOrEmpty(response.Error))
|
|
{
|
|
return StatusCode(500, response);
|
|
}
|
|
|
|
return Ok(response);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error evaluating pronunciation for text: {Text}", targetText);
|
|
return StatusCode(500, new PronunciationResponse
|
|
{
|
|
Error = "Internal server error"
|
|
});
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Get supported voices for TTS
|
|
/// </summary>
|
|
/// <returns>List of available voices</returns>
|
|
[HttpGet("voices")]
|
|
public ActionResult<object> GetVoices()
|
|
{
|
|
var voices = new
|
|
{
|
|
US = new[]
|
|
{
|
|
new { Id = "en-US-AriaNeural", Name = "Aria", Gender = "Female" },
|
|
new { Id = "en-US-GuyNeural", Name = "Guy", Gender = "Male" },
|
|
new { Id = "en-US-JennyNeural", Name = "Jenny", Gender = "Female" }
|
|
},
|
|
UK = new[]
|
|
{
|
|
new { Id = "en-GB-SoniaNeural", Name = "Sonia", Gender = "Female" },
|
|
new { Id = "en-GB-RyanNeural", Name = "Ryan", Gender = "Male" },
|
|
new { Id = "en-GB-LibbyNeural", Name = "Libby", Gender = "Female" }
|
|
}
|
|
};
|
|
|
|
return Ok(voices);
|
|
}
|
|
|
|
private static bool IsValidAccent(string accent)
|
|
{
|
|
return accent?.ToLower() is "us" or "uk";
|
|
}
|
|
} |