dramaling-vocab-learning/backend/DramaLing.Api/Controllers/SpeechController.cs

226 lines
9.3 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Authorization;
using DramaLing.Api.Models.DTOs;
using DramaLing.Api.Contracts.Services.Speech;
using DramaLing.Api.Services;
namespace DramaLing.Api.Controllers;
[Route("api/speech")]
[AllowAnonymous] // 暫時開放測試,之後可以加上認證
[ApiExplorerSettings(IgnoreApi = true)] // 暫時從 Swagger 排除,避免 IFormFile 相關問題
public class SpeechController : BaseController
{
private readonly IPronunciationAssessmentService _assessmentService;
public SpeechController(
IPronunciationAssessmentService assessmentService,
IAuthService authService,
ILogger<SpeechController> logger) : base(logger, authService)
{
_assessmentService = assessmentService;
}
/// <summary>
/// 發音評估 - 上傳音頻檔案並獲得 AI 發音評估結果
/// </summary>
/// <param name="audio">音頻檔案 (WAV/WebM/MP3 格式,最大 10MB)</param>
/// <param name="referenceText">參考文本 - 用戶應該說出的目標句子</param>
/// <param name="flashcardId">詞卡 ID</param>
/// <param name="language">語言代碼 (預設: en-US)</param>
/// <returns>包含準確度、流暢度等多維度評分的評估結果</returns>
[HttpPost("pronunciation-assessment")]
[Consumes("multipart/form-data")]
[ProducesResponseType(typeof(PronunciationResult), 200)]
[ProducesResponseType(400)]
[ProducesResponseType(500)]
[DisableRequestSizeLimit] // 允許大檔案上傳
public async Task<IActionResult> EvaluatePronunciation(
[FromForm] IFormFile audio,
[FromForm] string referenceText,
[FromForm] string flashcardId,
[FromForm] string language = "en-US")
{
_logger.LogInformation("✅ Controller Action 開始執行 - FlashcardId: {FlashcardId}, ReferenceText: {ReferenceText}",
flashcardId ?? "NULL", referenceText?.Substring(0, Math.Min(50, referenceText?.Length ?? 0)) ?? "NULL");
// 檢查 ModelState 是否有效
if (!ModelState.IsValid)
{
_logger.LogWarning("ModelState 驗證失敗:");
foreach (var modelError in ModelState.Where(m => m.Value.Errors.Count > 0))
{
foreach (var error in modelError.Value.Errors)
{
_logger.LogWarning(" {Key}: {Error}", modelError.Key, error.ErrorMessage);
}
}
return ErrorResponse("MODEL_VALIDATION_ERROR", "請求參數驗證失敗", ModelState, 400);
}
try
{
// 1. 驗證請求
if (audio == null || audio.Length == 0)
{
return ErrorResponse("AUDIO_REQUIRED", "音頻檔案不能為空", null, 400);
}
if (audio.Length > 10 * 1024 * 1024) // 10MB 限制
{
return ErrorResponse("AUDIO_TOO_LARGE", "音頻檔案過大,請限制在 10MB 以內",
new { maxSize = "10MB", actualSize = $"{audio.Length / 1024 / 1024}MB" }, 400);
}
if (string.IsNullOrWhiteSpace(referenceText))
{
return ErrorResponse("REFERENCE_TEXT_REQUIRED", "參考文本不能為空", null, 400);
}
if (string.IsNullOrWhiteSpace(flashcardId))
{
return ErrorResponse("FLASHCARD_ID_REQUIRED", "詞卡 ID 不能為空", null, 400);
}
// 2. 驗證音頻格式 - 支援更多格式
var contentType = audio.ContentType?.ToLowerInvariant();
var allowedTypes = new[] {
"audio/wav", "audio/webm", "audio/mp3", "audio/mpeg",
"audio/ogg", "audio/mp4", "audio/x-wav", "audio/wave"
};
_logger.LogInformation("接收到音頻檔案: ContentType={ContentType}, Size={Size}bytes, FileName={FileName}",
contentType, audio.Length, audio.FileName);
// 如果沒有 Content-Type 或者不在允許列表中,記錄但不立即拒絕
if (string.IsNullOrEmpty(contentType) || !allowedTypes.Contains(contentType))
{
_logger.LogWarning("音頻格式可能不支援: ContentType={ContentType}, 將嘗試處理", contentType);
// 註解掉嚴格驗證,讓 Azure Speech Services 自己處理
// return ErrorResponse("INVALID_AUDIO_FORMAT", "不支援的音頻格式",
// new { supportedFormats = allowedTypes }, 400);
}
// 3. 驗證音頻時長 (簡單檢查檔案大小作為時長估算)
if (audio.Length < 100) // 降低到 100 bytes允許短小的測試檔案
{
return ErrorResponse("AUDIO_TOO_SHORT", "錄音時間太短或檔案損壞",
new {
minSize = "100 bytes",
actualSize = $"{audio.Length} bytes",
fileName = audio.FileName,
contentType = contentType
}, 400);
}
_logger.LogInformation("開始處理發音評估: FlashcardId={FlashcardId}, Size={Size}MB",
flashcardId, audio.Length / 1024.0 / 1024.0);
// 4. 處理音頻流並呼叫 Azure Speech Services
using var audioStream = audio.OpenReadStream();
var result = await _assessmentService.EvaluatePronunciationAsync(
audioStream, referenceText, flashcardId, language);
_logger.LogInformation("發音評估完成: Score={Score}, ProcessingTime={Time}ms",
result.Scores.Overall, result.ProcessingTime);
return SuccessResponse(result, "發音評估完成");
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "發音評估業務邏輯錯誤: FlashcardId={FlashcardId}", flashcardId);
return ErrorResponse("SPEECH_PROCESSING_ERROR", ex.Message, null, 400);
}
catch (Exception ex)
{
_logger.LogError(ex, "發音評估系統錯誤: FlashcardId={FlashcardId}", flashcardId);
return ErrorResponse("INTERNAL_ERROR", "發音評估失敗,請稍後再試", null, 500);
}
}
/// <summary>
/// 測試用的簡化發音評估 endpoint - 用於除錯 model binding 問題
/// </summary>
[HttpPost("test-pronunciation")]
[Consumes("multipart/form-data")]
[ProducesResponseType(200)]
[ProducesResponseType(400)]
[DisableRequestSizeLimit]
public async Task<IActionResult> TestPronunciation()
{
_logger.LogInformation("🔧 測試 endpoint 開始執行");
try
{
// 直接使用 Request.Form 避開 model binding
var form = await Request.ReadFormAsync();
_logger.LogInformation("📝 Form 讀取成功,包含 {Count} 個欄位", form.Count);
// 記錄所有 form fields
foreach (var field in form)
{
_logger.LogInformation(" Field: {Key} = {Value}", field.Key, field.Value.ToString());
}
// 記錄所有 files
if (form.Files.Count > 0)
{
_logger.LogInformation("📁 找到 {Count} 個檔案", form.Files.Count);
foreach (var file in form.Files)
{
_logger.LogInformation(" 檔案: {Name}, 大小: {Size}bytes, 類型: {Type}",
file.Name, file.Length, file.ContentType);
}
}
else
{
_logger.LogWarning("⚠️ 沒有找到檔案");
}
return SuccessResponse(new
{
FormFieldCount = form.Count,
FileCount = form.Files.Count,
Fields = form.ToDictionary(f => f.Key, f => f.Value.ToString()),
Files = form.Files.Select(f => new { f.Name, f.Length, f.ContentType })
}, "測試成功");
}
catch (Exception ex)
{
_logger.LogError(ex, "❌ 測試 endpoint 錯誤");
return ErrorResponse("TEST_ERROR", ex.Message, null, 500);
}
}
/// <summary>
/// 檢查語音服務狀態
/// </summary>
/// <returns>Azure Speech Services 的可用性狀態</returns>
[HttpGet("service-status")]
[ProducesResponseType(typeof(object), 200)]
[ProducesResponseType(500)]
public async Task<IActionResult> GetServiceStatus()
{
try
{
var isAvailable = await _assessmentService.IsServiceAvailableAsync();
var status = new
{
IsAvailable = isAvailable,
ServiceName = "Azure Speech Services",
CheckTime = DateTime.UtcNow,
Message = isAvailable ? "服務正常運行" : "服務不可用"
};
return SuccessResponse(status);
}
catch (Exception ex)
{
_logger.LogError(ex, "檢查語音服務狀態時發生錯誤");
return ErrorResponse("SERVICE_CHECK_ERROR", "無法檢查服務狀態", null, 500);
}
}
}