256 lines
9.1 KiB
C#
256 lines
9.1 KiB
C#
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
|
using DramaLing.Api.Data;
|
|
using DramaLing.Api.Services.AI;
|
|
using DramaLing.Api.Services.Caching;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using System.Diagnostics;
|
|
|
|
namespace DramaLing.Api.Services;
|
|
|
|
/// <summary>
|
|
/// 系統健康檢查服務,監控各個重要組件的狀態
|
|
/// </summary>
|
|
public class SystemHealthCheckService : IHealthCheck
|
|
{
|
|
private readonly DramaLingDbContext _dbContext;
|
|
private readonly IAIProviderManager _aiProviderManager;
|
|
private readonly ICacheService _cacheService;
|
|
private readonly ILogger<SystemHealthCheckService> _logger;
|
|
|
|
public SystemHealthCheckService(
|
|
DramaLingDbContext dbContext,
|
|
IAIProviderManager aiProviderManager,
|
|
ICacheService cacheService,
|
|
ILogger<SystemHealthCheckService> logger)
|
|
{
|
|
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
|
|
_aiProviderManager = aiProviderManager ?? throw new ArgumentNullException(nameof(aiProviderManager));
|
|
_cacheService = cacheService ?? throw new ArgumentNullException(nameof(cacheService));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var healthData = new Dictionary<string, object>();
|
|
var isHealthy = true;
|
|
var failureMessages = new List<string>();
|
|
|
|
try
|
|
{
|
|
// 1. 資料庫健康檢查
|
|
var dbCheck = await CheckDatabaseHealthAsync();
|
|
healthData["Database"] = dbCheck;
|
|
if (!dbCheck.IsHealthy)
|
|
{
|
|
isHealthy = false;
|
|
failureMessages.Add($"Database: {dbCheck.Message}");
|
|
}
|
|
|
|
// 2. AI 服務健康檢查
|
|
var aiCheck = await CheckAIServicesHealthAsync();
|
|
healthData["AIServices"] = aiCheck;
|
|
if (!aiCheck.IsHealthy)
|
|
{
|
|
isHealthy = false;
|
|
failureMessages.Add($"AI Services: {aiCheck.Message}");
|
|
}
|
|
|
|
// 3. 快取服務健康檢查
|
|
var cacheCheck = await CheckCacheHealthAsync();
|
|
healthData["Cache"] = cacheCheck;
|
|
if (!cacheCheck.IsHealthy)
|
|
{
|
|
isHealthy = false;
|
|
failureMessages.Add($"Cache: {cacheCheck.Message}");
|
|
}
|
|
|
|
// 4. 記憶體使用檢查
|
|
var memoryCheck = CheckMemoryUsage();
|
|
healthData["Memory"] = memoryCheck;
|
|
if (!memoryCheck.IsHealthy)
|
|
{
|
|
isHealthy = false;
|
|
failureMessages.Add($"Memory: {memoryCheck.Message}");
|
|
}
|
|
|
|
// 5. 系統資源檢查
|
|
healthData["SystemInfo"] = GetSystemInfo();
|
|
|
|
var result = isHealthy
|
|
? HealthCheckResult.Healthy("All systems operational", healthData)
|
|
: HealthCheckResult.Unhealthy($"Health check failed: {string.Join(", ", failureMessages)}", null, healthData);
|
|
|
|
_logger.LogInformation("Health check completed: {Status}", result.Status);
|
|
return result;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Health check failed with exception");
|
|
return HealthCheckResult.Unhealthy("Health check exception", ex, healthData);
|
|
}
|
|
}
|
|
|
|
private async Task<HealthCheckComponent> CheckDatabaseHealthAsync()
|
|
{
|
|
try
|
|
{
|
|
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
|
var startTime = DateTime.UtcNow;
|
|
|
|
// 簡單的連接性測試
|
|
await _dbContext.Database.CanConnectAsync(cts.Token);
|
|
|
|
var responseTime = (DateTime.UtcNow - startTime).TotalMilliseconds;
|
|
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = true,
|
|
Message = "Database connection successful",
|
|
ResponseTimeMs = (int)responseTime,
|
|
CheckedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = false,
|
|
Message = $"Database connection failed: {ex.Message}",
|
|
CheckedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
private async Task<HealthCheckComponent> CheckAIServicesHealthAsync()
|
|
{
|
|
try
|
|
{
|
|
var healthReport = await _aiProviderManager.CheckAllProvidersHealthAsync();
|
|
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = healthReport.HealthyProviders > 0,
|
|
Message = $"{healthReport.HealthyProviders}/{healthReport.TotalProviders} AI providers healthy",
|
|
ResponseTimeMs = healthReport.ProviderHealthInfos.Any()
|
|
? (int)healthReport.ProviderHealthInfos.Average(p => p.ResponseTimeMs)
|
|
: 0,
|
|
CheckedAt = healthReport.CheckedAt,
|
|
Details = healthReport.ProviderHealthInfos.ToDictionary(
|
|
p => p.ProviderName,
|
|
p => new { p.IsHealthy, p.ResponseTimeMs, p.ErrorMessage })
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = false,
|
|
Message = $"AI services check failed: {ex.Message}",
|
|
CheckedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
private async Task<HealthCheckComponent> CheckCacheHealthAsync()
|
|
{
|
|
try
|
|
{
|
|
var testKey = $"health_check_{Guid.NewGuid()}";
|
|
var testValue = new { Test = "HealthCheck", Timestamp = DateTime.UtcNow };
|
|
|
|
var startTime = DateTime.UtcNow;
|
|
|
|
// 測試設定和讀取
|
|
await _cacheService.SetAsync(testKey, testValue, TimeSpan.FromMinutes(1));
|
|
var retrieved = await _cacheService.GetAsync<object>(testKey);
|
|
await _cacheService.RemoveAsync(testKey);
|
|
|
|
var responseTime = (DateTime.UtcNow - startTime).TotalMilliseconds;
|
|
|
|
var stats = await _cacheService.GetStatsAsync();
|
|
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = retrieved != null,
|
|
Message = "Cache service operational",
|
|
ResponseTimeMs = (int)responseTime,
|
|
CheckedAt = DateTime.UtcNow,
|
|
Details = new
|
|
{
|
|
HitRate = stats.HitRate,
|
|
TotalKeys = stats.TotalKeys,
|
|
TotalRequests = stats.TotalRequests
|
|
}
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = false,
|
|
Message = $"Cache service failed: {ex.Message}",
|
|
CheckedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
private HealthCheckComponent CheckMemoryUsage()
|
|
{
|
|
try
|
|
{
|
|
var memoryUsage = GC.GetTotalMemory(false);
|
|
var maxMemory = 512 * 1024 * 1024; // 512MB 限制
|
|
var memoryPercentage = (double)memoryUsage / maxMemory;
|
|
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = memoryPercentage < 0.8, // 80% 記憶體使用率為警告線
|
|
Message = $"Memory usage: {memoryUsage / 1024 / 1024}MB ({memoryPercentage:P1})",
|
|
CheckedAt = DateTime.UtcNow,
|
|
Details = new
|
|
{
|
|
MemoryUsageBytes = memoryUsage,
|
|
MemoryUsageMB = memoryUsage / 1024 / 1024,
|
|
MemoryPercentage = memoryPercentage,
|
|
MaxMemoryMB = maxMemory / 1024 / 1024
|
|
}
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return new HealthCheckComponent
|
|
{
|
|
IsHealthy = false,
|
|
Message = $"Memory check failed: {ex.Message}",
|
|
CheckedAt = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
private object GetSystemInfo()
|
|
{
|
|
return new
|
|
{
|
|
Environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Unknown",
|
|
MachineName = Environment.MachineName,
|
|
OSVersion = Environment.OSVersion.ToString(),
|
|
ProcessorCount = Environment.ProcessorCount,
|
|
RuntimeVersion = Environment.Version.ToString(),
|
|
Uptime = DateTime.UtcNow - Process.GetCurrentProcess().StartTime.ToUniversalTime(),
|
|
Timestamp = DateTime.UtcNow
|
|
};
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// 健康檢查組件結果
|
|
/// </summary>
|
|
public class HealthCheckComponent
|
|
{
|
|
public bool IsHealthy { get; set; }
|
|
public string Message { get; set; } = string.Empty;
|
|
public int ResponseTimeMs { get; set; }
|
|
public DateTime CheckedAt { get; set; }
|
|
public object? Details { get; set; }
|
|
} |