dramaling-vocab-learning/backend/DramaLing.Api/Services/AI/Gemini/ImageDescriptionGenerator.cs

118 lines
4.7 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using DramaLing.Api.Models.DTOs;
using DramaLing.Api.Models.Entities;
namespace DramaLing.Api.Services.AI.Gemini;
/// <summary>
/// 圖片描述生成服務實作
/// </summary>
public class ImageDescriptionGenerator : IImageDescriptionGenerator
{
private readonly IGeminiClient _geminiClient;
private readonly ILogger<ImageDescriptionGenerator> _logger;
public ImageDescriptionGenerator(
IGeminiClient geminiClient,
ILogger<ImageDescriptionGenerator> logger)
{
_geminiClient = geminiClient ?? throw new ArgumentNullException(nameof(geminiClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<string> GenerateImageDescriptionAsync(Flashcard flashcard, GenerationOptionsDto options)
{
try
{
_logger.LogInformation("Starting image description generation for flashcard {FlashcardId}", flashcard.Id);
var prompt = BuildImageDescriptionPrompt(flashcard, options);
var response = await _geminiClient.CallGeminiAPIAsync(prompt);
if (string.IsNullOrWhiteSpace(response))
{
throw new InvalidOperationException("Gemini API returned empty response");
}
var description = ExtractImageDescription(response);
var optimizedPrompt = OptimizeForReplicate(description, options);
_logger.LogInformation("Image description generated successfully for flashcard {FlashcardId}", flashcard.Id);
return optimizedPrompt;
}
catch (Exception ex)
{
_logger.LogError(ex, "Image description generation failed for flashcard {FlashcardId}", flashcard.Id);
throw;
}
}
private string BuildImageDescriptionPrompt(Flashcard flashcard, GenerationOptionsDto options)
{
return $@"# 總覽
你是一位專業插畫設計師兼職英文老師,專門為英語學習教材製作插畫圖卡,用來幫助學生理解英文例句的意思。
# 例句資訊
例句:{flashcard.Example}
# SOP
1. 根據上述英文例句請撰寫一段圖像描述提示詞用於提供圖片生成AI作為生成圖片的提示詞
2. 請將下方「風格指南」的所有要求加入提示詞中
3. 並於圖片提示詞最後加上「Absolutely no visible text, characters, letters, numbers, symbols, handwriting, labels, or any form of writing anywhere in the image — including on signs, books, clothing, screens, or backgrounds.」
# 圖片提示詞規範
## 情境清楚
1. 角色描述具體清楚
2. 動作明確具象
3. 場景明確具體
4. 物品明確具體
5. 語意需與原句一致
6. 避免過於抽象或象徵性符號
## 風格指南
- 風格類型扁平插畫Flat Illustration
- 線條特徵無描邊線條outline-less
- 色調:暖色調、柔和、低飽和
- 人物樣式:簡化卡通人物,表情自然,不誇張
- 背景構成:圖形簡化,使用色塊區分層次
- 整體氛圍:溫馨、平靜、適合教育情境
- 技術風格:無紋理、無漸層、無光影寫實感
請根據以上規範,為這個英文例句生成圖片描述提示詞,並確保完全符合風格指南要求。";
}
private string ExtractImageDescription(string geminiResponse)
{
// 從 Gemini 回應中提取圖片描述
var description = geminiResponse.Trim();
// 移除可能的 markdown 標記
if (description.StartsWith("```"))
{
var lines = description.Split('\n');
description = string.Join('\n', lines.Skip(1).SkipLast(1));
}
return description.Trim();
}
private string OptimizeForReplicate(string description, GenerationOptionsDto options)
{
var optimizedPrompt = description;
// 確保包含扁平插畫風格要求
if (!optimizedPrompt.Contains("flat illustration"))
{
optimizedPrompt += ". Style guide: flat illustration style, outline-less shapes, warm and soft color tones, low saturation, cartoon-style characters with natural expressions, simplified background with color blocks, cozy and educational atmosphere, no texture, no gradients, no photorealism, no fantasy elements.";
}
// 強制加入禁止文字的規則
if (!optimizedPrompt.Contains("Absolutely no visible text"))
{
optimizedPrompt += " Absolutely no visible text, characters, letters, numbers, symbols, handwriting, labels, or any form of writing anywhere in the image — including on signs, books, clothing, screens, or backgrounds.";
}
return optimizedPrompt;
}
}