dramaling-vocab-learning/backend/DramaLing.Api/Services/HealthCheckService.cs

256 lines
9.1 KiB
C#

using Microsoft.Extensions.Diagnostics.HealthChecks;
using DramaLing.Api.Data;
using DramaLing.Api.Services.AI;
using DramaLing.Api.Services.Caching;
using Microsoft.EntityFrameworkCore;
using System.Diagnostics;
namespace DramaLing.Api.Services;
/// <summary>
/// 系統健康檢查服務,監控各個重要組件的狀態
/// </summary>
public class SystemHealthCheckService : IHealthCheck
{
private readonly DramaLingDbContext _dbContext;
private readonly IAIProviderManager _aiProviderManager;
private readonly ICacheService _cacheService;
private readonly ILogger<SystemHealthCheckService> _logger;
public SystemHealthCheckService(
DramaLingDbContext dbContext,
IAIProviderManager aiProviderManager,
ICacheService cacheService,
ILogger<SystemHealthCheckService> logger)
{
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
_aiProviderManager = aiProviderManager ?? throw new ArgumentNullException(nameof(aiProviderManager));
_cacheService = cacheService ?? throw new ArgumentNullException(nameof(cacheService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var healthData = new Dictionary<string, object>();
var isHealthy = true;
var failureMessages = new List<string>();
try
{
// 1. 資料庫健康檢查
var dbCheck = await CheckDatabaseHealthAsync();
healthData["Database"] = dbCheck;
if (!dbCheck.IsHealthy)
{
isHealthy = false;
failureMessages.Add($"Database: {dbCheck.Message}");
}
// 2. AI 服務健康檢查
var aiCheck = await CheckAIServicesHealthAsync();
healthData["AIServices"] = aiCheck;
if (!aiCheck.IsHealthy)
{
isHealthy = false;
failureMessages.Add($"AI Services: {aiCheck.Message}");
}
// 3. 快取服務健康檢查
var cacheCheck = await CheckCacheHealthAsync();
healthData["Cache"] = cacheCheck;
if (!cacheCheck.IsHealthy)
{
isHealthy = false;
failureMessages.Add($"Cache: {cacheCheck.Message}");
}
// 4. 記憶體使用檢查
var memoryCheck = CheckMemoryUsage();
healthData["Memory"] = memoryCheck;
if (!memoryCheck.IsHealthy)
{
isHealthy = false;
failureMessages.Add($"Memory: {memoryCheck.Message}");
}
// 5. 系統資源檢查
healthData["SystemInfo"] = GetSystemInfo();
var result = isHealthy
? HealthCheckResult.Healthy("All systems operational", healthData)
: HealthCheckResult.Unhealthy($"Health check failed: {string.Join(", ", failureMessages)}", null, healthData);
_logger.LogInformation("Health check completed: {Status}", result.Status);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Health check failed with exception");
return HealthCheckResult.Unhealthy("Health check exception", ex, healthData);
}
}
private async Task<HealthCheckComponent> CheckDatabaseHealthAsync()
{
try
{
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
var startTime = DateTime.UtcNow;
// 簡單的連接性測試
await _dbContext.Database.CanConnectAsync(cts.Token);
var responseTime = (DateTime.UtcNow - startTime).TotalMilliseconds;
return new HealthCheckComponent
{
IsHealthy = true,
Message = "Database connection successful",
ResponseTimeMs = (int)responseTime,
CheckedAt = DateTime.UtcNow
};
}
catch (Exception ex)
{
return new HealthCheckComponent
{
IsHealthy = false,
Message = $"Database connection failed: {ex.Message}",
CheckedAt = DateTime.UtcNow
};
}
}
private async Task<HealthCheckComponent> CheckAIServicesHealthAsync()
{
try
{
var healthReport = await _aiProviderManager.CheckAllProvidersHealthAsync();
return new HealthCheckComponent
{
IsHealthy = healthReport.HealthyProviders > 0,
Message = $"{healthReport.HealthyProviders}/{healthReport.TotalProviders} AI providers healthy",
ResponseTimeMs = healthReport.ProviderHealthInfos.Any()
? (int)healthReport.ProviderHealthInfos.Average(p => p.ResponseTimeMs)
: 0,
CheckedAt = healthReport.CheckedAt,
Details = healthReport.ProviderHealthInfos.ToDictionary(
p => p.ProviderName,
p => new { p.IsHealthy, p.ResponseTimeMs, p.ErrorMessage })
};
}
catch (Exception ex)
{
return new HealthCheckComponent
{
IsHealthy = false,
Message = $"AI services check failed: {ex.Message}",
CheckedAt = DateTime.UtcNow
};
}
}
private async Task<HealthCheckComponent> CheckCacheHealthAsync()
{
try
{
var testKey = $"health_check_{Guid.NewGuid()}";
var testValue = new { Test = "HealthCheck", Timestamp = DateTime.UtcNow };
var startTime = DateTime.UtcNow;
// 測試設定和讀取
await _cacheService.SetAsync(testKey, testValue, TimeSpan.FromMinutes(1));
var retrieved = await _cacheService.GetAsync<object>(testKey);
await _cacheService.RemoveAsync(testKey);
var responseTime = (DateTime.UtcNow - startTime).TotalMilliseconds;
var stats = await _cacheService.GetStatsAsync();
return new HealthCheckComponent
{
IsHealthy = retrieved != null,
Message = "Cache service operational",
ResponseTimeMs = (int)responseTime,
CheckedAt = DateTime.UtcNow,
Details = new
{
HitRate = stats.HitRate,
TotalKeys = stats.TotalKeys,
TotalRequests = stats.TotalRequests
}
};
}
catch (Exception ex)
{
return new HealthCheckComponent
{
IsHealthy = false,
Message = $"Cache service failed: {ex.Message}",
CheckedAt = DateTime.UtcNow
};
}
}
private HealthCheckComponent CheckMemoryUsage()
{
try
{
var memoryUsage = GC.GetTotalMemory(false);
var maxMemory = 512 * 1024 * 1024; // 512MB 限制
var memoryPercentage = (double)memoryUsage / maxMemory;
return new HealthCheckComponent
{
IsHealthy = memoryPercentage < 0.8, // 80% 記憶體使用率為警告線
Message = $"Memory usage: {memoryUsage / 1024 / 1024}MB ({memoryPercentage:P1})",
CheckedAt = DateTime.UtcNow,
Details = new
{
MemoryUsageBytes = memoryUsage,
MemoryUsageMB = memoryUsage / 1024 / 1024,
MemoryPercentage = memoryPercentage,
MaxMemoryMB = maxMemory / 1024 / 1024
}
};
}
catch (Exception ex)
{
return new HealthCheckComponent
{
IsHealthy = false,
Message = $"Memory check failed: {ex.Message}",
CheckedAt = DateTime.UtcNow
};
}
}
private object GetSystemInfo()
{
return new
{
Environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Unknown",
MachineName = Environment.MachineName,
OSVersion = Environment.OSVersion.ToString(),
ProcessorCount = Environment.ProcessorCount,
RuntimeVersion = Environment.Version.ToString(),
Uptime = DateTime.UtcNow - Process.GetCurrentProcess().StartTime.ToUniversalTime(),
Timestamp = DateTime.UtcNow
};
}
}
/// <summary>
/// 健康檢查組件結果
/// </summary>
public class HealthCheckComponent
{
public bool IsHealthy { get; set; }
public string Message { get; set; } = string.Empty;
public int ResponseTimeMs { get; set; }
public DateTime CheckedAt { get; set; }
public object? Details { get; set; }
}