21 KiB
21 KiB
選項詞彙庫功能規格書
版本: 1.0 日期: 2025-09-29 專案: DramaLing 智能英語學習系統 功能模組: 測驗選項生成系統
📋 功能概述
背景
目前 DramaLing 系統的測驗選項生成存在以下問題:
- 前端使用簡單佔位符:
["其他選項1", "其他選項2", "其他選項3"] - 後端隨機選擇:從用戶自己的詞卡中隨機選取,缺乏智能性
- 選項品質不穩定:可能產生過於簡單或困難的干擾項
- 缺乏科學性:未考慮語言學習的認知負荷理論
目標
建立一個智能選項詞彙庫系統,根據目標詞彙的特徵自動生成高品質的測驗干擾項。
核心特性
- 三參數匹配:CEFR 等級、字數、詞性
- 智能篩選:避免同義詞、相似拼寫等不合適的選項
- 可擴展性:支援持續新增詞彙和優化演算法
- 效能優化:透過索引和快取確保快速回應
🎯 功能需求
核心需求
| 需求ID | 描述 | 優先級 |
|---|---|---|
| REQ-001 | 根據 CEFR 等級匹配相近難度的詞彙 | 高 |
| REQ-002 | 根據字數(字元長度)匹配類似長度的詞彙 | 高 |
| REQ-003 | 根據詞性匹配相同詞性的詞彙 | 高 |
| REQ-004 | 每次生成 3 個不同的干擾項 | 高 |
| REQ-005 | 支援多種測驗類型(詞彙選擇、聽力等) | 中 |
| REQ-006 | 提供詞彙庫管理介面 | 低 |
設計簡化說明:為降低維護成本和實作複雜度,移除了同義詞排除、品質評分、頻率評級等進階功能。專注於三參數匹配的核心功能,確保系統簡潔實用。
非功能需求
| 需求ID | 描述 | 指標 |
|---|---|---|
| NFR-001 | 回應時間 | < 100ms |
| NFR-002 | 詞彙庫大小 | 初期 ≥ 10,000 詞 |
| NFR-003 | 可用性 | 99.9% |
| NFR-004 | 擴展性 | 支援 100,000+ 詞彙 |
🏗️ 系統設計
整體架構
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ 前端測驗頁面 │────│ 選項生成API │────│ 詞彙庫服務 │
└─────────────────┘ └─────────────────┘ └─────────────────┘
│ │
▼ ▼
┌─────────────────┐ ┌─────────────────┐
│ 快取層 │ │ 選項詞彙庫 │
│ (Redis/Memory) │ │ (Database) │
└─────────────────┘ └─────────────────┘
核心元件
- OptionsVocabulary 實體 - 詞彙庫資料模型
- OptionsVocabularyService - 詞彙庫業務邏輯
- DistractorGenerationService - 干擾項生成邏輯
- VocabularyMatchingEngine - 詞彙匹配演算法
📊 資料模型設計
OptionsVocabulary 實體
namespace DramaLing.Api.Models.Entities;
public class OptionsVocabulary
{
/// <summary>
/// 主鍵
/// </summary>
public Guid Id { get; set; }
/// <summary>
/// 詞彙內容
/// </summary>
[Required]
[MaxLength(100)]
[Index("IX_OptionsVocabulary_Word", IsUnique = true)]
public string Word { get; set; } = string.Empty;
/// <summary>
/// CEFR 難度等級 (A1, A2, B1, B2, C1, C2)
/// </summary>
[Required]
[MaxLength(2)]
[Index("IX_OptionsVocabulary_CEFR")]
public string CEFRLevel { get; set; } = string.Empty;
/// <summary>
/// 詞性 (noun, verb, adjective, adverb, etc.)
/// </summary>
[Required]
[MaxLength(20)]
[Index("IX_OptionsVocabulary_PartOfSpeech")]
public string PartOfSpeech { get; set; } = string.Empty;
/// <summary>
/// 字數(字元長度)- 自動從 Word 計算
/// </summary>
[Index("IX_OptionsVocabulary_WordLength")]
public int WordLength { get; set; }
/// <summary>
/// 是否啟用
/// </summary>
public bool IsActive { get; set; } = true;
/// <summary>
/// 創建時間
/// </summary>
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
/// <summary>
/// 更新時間
/// </summary>
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
}
複合索引設計
// 在 DbContext 中配置
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
// 核心查詢索引:CEFR + 詞性 + 字數
modelBuilder.Entity<OptionsVocabulary>()
.HasIndex(e => new { e.CEFRLevel, e.PartOfSpeech, e.WordLength })
.HasDatabaseName("IX_OptionsVocabulary_Core_Matching");
// 啟用狀態索引
modelBuilder.Entity<OptionsVocabulary>()
.HasIndex(e => e.IsActive)
.HasDatabaseName("IX_OptionsVocabulary_Active");
}
🔧 服務層設計
IOptionsVocabularyService 介面
namespace DramaLing.Api.Services;
public interface IOptionsVocabularyService
{
/// <summary>
/// 根據目標詞彙生成干擾項
/// </summary>
Task<List<string>> GenerateDistractorsAsync(
string targetWord,
string cefrLevel,
string partOfSpeech,
int count = 3);
/// <summary>
/// 新增詞彙到選項庫
/// </summary>
Task<bool> AddVocabularyAsync(OptionsVocabulary vocabulary);
/// <summary>
/// 批量匯入詞彙
/// </summary>
Task<int> BulkImportAsync(List<OptionsVocabulary> vocabularies);
/// <summary>
/// 根據條件搜尋詞彙
/// </summary>
Task<List<OptionsVocabulary>> SearchVocabulariesAsync(
string? cefrLevel = null,
string? partOfSpeech = null,
int? minLength = null,
int? maxLength = null,
int limit = 100);
}
DistractorGenerationService 核心邏輯
public class DistractorGenerationService
{
private readonly DramaLingDbContext _context;
private readonly IMemoryCache _cache;
private readonly ILogger<DistractorGenerationService> _logger;
public async Task<List<string>> GenerateDistractorsAsync(
string targetWord,
string cefrLevel,
string partOfSpeech)
{
var targetLength = targetWord.Length;
// 1. 基礎篩選條件
var baseQuery = _context.OptionsVocabularies
.Where(v => v.IsActive && v.Word != targetWord);
// 2. CEFR 等級匹配(相同等級 + 相鄰等級)
var allowedCEFRLevels = GetAllowedCEFRLevels(cefrLevel);
baseQuery = baseQuery.Where(v => allowedCEFRLevels.Contains(v.CEFRLevel));
// 3. 詞性匹配
baseQuery = baseQuery.Where(v => v.PartOfSpeech == partOfSpeech);
// 4. 字數匹配(±2 字元範圍)
var minLength = Math.Max(1, targetLength - 2);
var maxLength = targetLength + 2;
baseQuery = baseQuery.Where(v => v.WordLength >= minLength && v.WordLength <= maxLength);
// 5. 隨機排序選取候選詞
var candidates = await baseQuery
.OrderBy(v => Guid.NewGuid())
.Take(10) // 取更多候選詞再篩選
.Select(v => v.Word)
.ToListAsync();
// 7. 最終篩選和回傳
return candidates.Take(3).ToList();
}
private List<string> GetAllowedCEFRLevels(string targetLevel)
{
var levels = new[] { "A1", "A2", "B1", "B2", "C1", "C2" };
var targetIndex = Array.IndexOf(levels, targetLevel);
if (targetIndex == -1) return new List<string> { targetLevel };
var allowed = new List<string> { targetLevel };
// 加入相鄰等級
if (targetIndex > 0) allowed.Add(levels[targetIndex - 1]);
if (targetIndex < levels.Length - 1) allowed.Add(levels[targetIndex + 1]);
return allowed;
}
}
🌐 API 設計
新增到 StudyController
/// <summary>
/// 生成測驗選項(使用詞彙庫)
/// </summary>
[HttpGet("question-options/{flashcardId}")]
public async Task<ActionResult<QuestionOptionsResponse>> GenerateQuestionOptions(
Guid flashcardId,
[FromQuery] string questionType = "vocab-choice")
{
try
{
var flashcard = await _context.Flashcards.FindAsync(flashcardId);
if (flashcard == null)
return NotFound(new { Error = "Flashcard not found" });
var options = await _distractorGenerationService.GenerateDistractorsAsync(
flashcard.Word,
flashcard.DifficultyLevel ?? "B1",
flashcard.PartOfSpeech ?? "noun");
// 加入正確答案並隨機打亂
var allOptions = new List<string> { flashcard.Word };
allOptions.AddRange(options);
var shuffledOptions = allOptions.OrderBy(x => Guid.NewGuid()).ToArray();
return Ok(new QuestionOptionsResponse
{
QuestionType = questionType,
Options = shuffledOptions,
CorrectAnswer = flashcard.Word,
TargetWord = flashcard.Word,
CEFRLevel = flashcard.DifficultyLevel,
PartOfSpeech = flashcard.PartOfSpeech
});
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating question options for flashcard {FlashcardId}", flashcardId);
return StatusCode(500, new { Error = "Internal server error" });
}
}
詞彙庫管理 API
/// <summary>
/// 詞彙庫管理控制器
/// </summary>
[ApiController]
[Route("api/[controller]")]
[Authorize(Roles = "Admin")]
public class OptionsVocabularyController : ControllerBase
{
private readonly IOptionsVocabularyService _vocabularyService;
/// <summary>
/// 新增詞彙到選項庫
/// </summary>
[HttpPost]
public async Task<ActionResult> AddVocabulary([FromBody] AddVocabularyRequest request)
{
var vocabulary = new OptionsVocabulary
{
Word = request.Word,
CEFRLevel = request.CEFRLevel,
PartOfSpeech = request.PartOfSpeech,
WordLength = request.Word.Length
};
var success = await _vocabularyService.AddVocabularyAsync(vocabulary);
return success ? Ok() : BadRequest();
}
/// <summary>
/// 批量匯入詞彙
/// </summary>
[HttpPost("bulk-import")]
public async Task<ActionResult> BulkImport([FromBody] List<AddVocabularyRequest> requests)
{
var vocabularies = requests.Select(r => new OptionsVocabulary
{
Word = r.Word,
CEFRLevel = r.CEFRLevel,
PartOfSpeech = r.PartOfSpeech,
WordLength = r.Word.Length
}).ToList();
var importedCount = await _vocabularyService.BulkImportAsync(vocabularies);
return Ok(new { ImportedCount = importedCount });
}
/// <summary>
/// 搜尋詞彙庫
/// </summary>
[HttpGet("search")]
public async Task<ActionResult<List<OptionsVocabulary>>> SearchVocabularies(
[FromQuery] string? cefrLevel = null,
[FromQuery] string? partOfSpeech = null,
[FromQuery] int? minLength = null,
[FromQuery] int? maxLength = null,
[FromQuery] int limit = 100)
{
var vocabularies = await _vocabularyService.SearchVocabulariesAsync(
cefrLevel, partOfSpeech, minLength, maxLength, limit);
return Ok(vocabularies);
}
}
📁 DTOs 定義
QuestionOptionsResponse
namespace DramaLing.Api.Models.DTOs;
public class QuestionOptionsResponse
{
public string QuestionType { get; set; } = string.Empty;
public string[] Options { get; set; } = Array.Empty<string>();
public string CorrectAnswer { get; set; } = string.Empty;
public string TargetWord { get; set; } = string.Empty;
public string? CEFRLevel { get; set; }
public string? PartOfSpeech { get; set; }
public DateTime GeneratedAt { get; set; } = DateTime.UtcNow;
}
AddVocabularyRequest
public class AddVocabularyRequest
{
[Required]
[MaxLength(100)]
public string Word { get; set; } = string.Empty;
[Required]
[RegularExpression("^(A1|A2|B1|B2|C1|C2)$")]
public string CEFRLevel { get; set; } = string.Empty;
[Required]
[MaxLength(20)]
public string PartOfSpeech { get; set; } = string.Empty;
}
💾 資料庫遷移
Migration 檔案
public partial class AddOptionsVocabularyTable : Migration
{
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "OptionsVocabularies",
columns: table => new
{
Id = table.Column<Guid>(nullable: false),
Word = table.Column<string>(maxLength: 100, nullable: false),
CEFRLevel = table.Column<string>(maxLength: 2, nullable: false),
PartOfSpeech = table.Column<string>(maxLength: 20, nullable: false),
WordLength = table.Column<int>(nullable: false),
IsActive = table.Column<bool>(nullable: false, defaultValue: true),
CreatedAt = table.Column<DateTime>(nullable: false),
UpdatedAt = table.Column<DateTime>(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_OptionsVocabularies", x => x.Id);
});
// 索引
migrationBuilder.CreateIndex(
name: "IX_OptionsVocabulary_Word",
table: "OptionsVocabularies",
column: "Word",
unique: true);
migrationBuilder.CreateIndex(
name: "IX_OptionsVocabulary_Core_Matching",
table: "OptionsVocabularies",
columns: new[] { "CEFRLevel", "PartOfSpeech", "WordLength" });
migrationBuilder.CreateIndex(
name: "IX_OptionsVocabulary_Active",
table: "OptionsVocabularies",
column: "IsActive");
}
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(name: "OptionsVocabularies");
}
}
🔄 使用案例
案例 1:詞彙選擇題
目標詞彙: "beautiful" (B1, adjective, 9字元)
篩選條件:
- CEFR: A2, B1, B2 (相鄰等級)
- 詞性: adjective
- 字數: 7-11 字元
可能的干擾項:
- "wonderful" (B1, adjective, 9字元)
- "excellent" (B2, adjective, 9字元)
- "attractive" (B2, adjective, 10字元)
最終選項: ["beautiful", "wonderful", "excellent", "attractive"]
案例 2:聽力測驗
目標詞彙: "running" (A2, verb, 7字元)
篩選條件:
- CEFR: A1, A2, B1
- 詞性: verb
- 字數: 5-9 字元
可能的干擾項:
- "jumping" (A2, verb, 7字元)
- "walking" (A1, verb, 7字元)
- "playing" (A2, verb, 7字元)
最終選項: ["running", "jumping", "walking", "playing"]
⚡ 效能考量
查詢優化
- 複合索引:(CEFRLevel, PartOfSpeech, WordLength)
- 覆蓋索引:包含常用查詢欄位
- 分頁查詢:避免一次載入過多資料
快取策略
public class CachedDistractorGenerationService
{
private readonly IMemoryCache _cache;
private readonly TimeSpan _cacheExpiry = TimeSpan.FromHours(1);
public async Task<List<string>> GenerateDistractorsAsync(string targetWord, string cefrLevel, string partOfSpeech)
{
var cacheKey = $"distractors:{targetWord}:{cefrLevel}:{partOfSpeech}";
if (_cache.TryGetValue(cacheKey, out List<string> cachedResult))
{
return cachedResult;
}
var result = await GenerateDistractorsInternalAsync(targetWord, cefrLevel, partOfSpeech);
_cache.Set(cacheKey, result, _cacheExpiry);
return result;
}
}
效能指標
| 指標 | 目標值 | 監控方式 |
|---|---|---|
| API 回應時間 | < 100ms | Application Insights |
| 資料庫查詢時間 | < 50ms | EF Core 日誌 |
| 快取命中率 | > 80% | 自訂計數器 |
| 併發請求數 | > 1000 req/s | 負載測試 |
📊 初始資料建立
資料來源建議
-
CEFR 詞彙表
- Cambridge English Vocabulary Profile
- Oxford 3000/5000 詞彙表
- 各級別教材詞彙表
-
詞性標注
- WordNet 資料庫
- 英語詞性詞典
- 語料庫分析結果
-
頻率評級
- Google Ngram Corpus
- Brown Corpus
- 現代英語使用頻率統計
初始資料腳本
public class VocabularySeeder
{
public async Task SeedInitialVocabularyAsync()
{
var vocabularies = new List<OptionsVocabulary>
{
// A1 Level - 名詞
new() { Word = "cat", CEFRLevel = "A1", PartOfSpeech = "noun", WordLength = 3 },
new() { Word = "dog", CEFRLevel = "A1", PartOfSpeech = "noun", WordLength = 3 },
new() { Word = "book", CEFRLevel = "A1", PartOfSpeech = "noun", WordLength = 4 },
// A1 Level - 動詞
new() { Word = "eat", CEFRLevel = "A1", PartOfSpeech = "verb", WordLength = 3 },
new() { Word = "run", CEFRLevel = "A1", PartOfSpeech = "verb", WordLength = 3 },
new() { Word = "walk", CEFRLevel = "A1", PartOfSpeech = "verb", WordLength = 4 },
// B1 Level - 形容詞
new() { Word = "beautiful", CEFRLevel = "B1", PartOfSpeech = "adjective", WordLength = 9 },
new() { Word = "wonderful", CEFRLevel = "B1", PartOfSpeech = "adjective", WordLength = 9 },
new() { Word = "excellent", CEFRLevel = "B2", PartOfSpeech = "adjective", WordLength = 9 },
// ... 更多詞彙
};
await _context.OptionsVocabularies.AddRangeAsync(vocabularies);
await _context.SaveChangesAsync();
}
}
🔄 服務註冊
Startup.cs / Program.cs
// 註冊服務
builder.Services.AddScoped<IOptionsVocabularyService, OptionsVocabularyService>();
builder.Services.AddScoped<DistractorGenerationService>();
// 記憶體快取
builder.Services.AddMemoryCache();
// 背景服務(可選)
builder.Services.AddHostedService<VocabularyQualityScoreUpdateService>();
📈 品質保證
演算法驗證
- A/B 測試:比較新舊選項生成方式的學習效果
- 專家評審:語言學習專家評估選項品質
- 用戶回饋:收集學習者對選項難度的反饋
監控指標
public class DistractorQualityMetrics
{
public double AverageResponseTime { get; set; }
public double OptionVariability { get; set; } // 選項多樣性
public double CEFRLevelAccuracy { get; set; } // CEFR 匹配準確度
public double UserSatisfactionScore { get; set; } // 用戶滿意度
public int TotalDistractorsGenerated { get; set; }
public DateTime MeasuredAt { get; set; }
}
🚀 實作階段規劃
Phase 1: 基礎實作 (1-2 週)
- 建立 OptionsVocabulary 實體和資料庫遷移
- 實作 OptionsVocabularyService 基礎功能
- 建立核心 API 端點
- 匯入初始詞彙資料(1000-5000 詞)
Phase 2: 演算法優化 (1 週)
- 實作 DistractorGenerationService
- 新增同義詞排除邏輯
- 實作品質評分系統
- 加入快取機制
Phase 3: 前端整合 (3-5 天)
- 修改前端 generateOptions 函數
- 整合新的 API 端點
- 測試各種測驗類型
- 效能測試和優化
Phase 4: 進階功能 (1-2 週)
- 管理介面開發
- 批量匯入工具
- 監控和分析儀表板
- A/B 測試框架
📋 驗收標準
功能驗收
- 能根據 CEFR、詞性、字數生成合適的干擾項
- API 回應時間 < 100ms
- 生成的選項無重複
- 支援各種測驗類型
品質驗收
- 干擾項難度適中(不會太簡單或太困難)
- 無明顯的同義詞作為干擾項
- 拼寫差異合理(避免過於相似)
技術驗收
- 程式碼覆蓋率 > 80%
- 通過所有單元測試
- API 文檔完整
- 效能測試通過
🔒 安全性考量
資料保護
- 詞彙庫資料非敏感性,無特殊加密需求
- 管理 API 需要管理員權限驗證
- 防止 SQL 注入攻擊
API 安全
- 實作 Rate Limiting 防止濫用
- 輸入驗證和清理
- 錯誤訊息不洩露系統資訊
📚 相關文件
規格書完成日期: 2025-09-29 下次更新時間: 實作完成後