Spaces:
Sleeping
Sleeping
| import sqlite3 | |
| import os | |
| import random | |
| from ai_sentence import generate_sentence | |
| from vocab import get_words_from_source, get_word_info | |
| from tqdm import tqdm | |
| DATA_DIR = "./data" | |
| DB_PATH = os.path.join(DATA_DIR, "sentences.db") | |
| # 初始化資料庫(建表) | |
| def init_db(): | |
| conn = sqlite3.connect(DB_PATH) | |
| c = conn.cursor() | |
| c.execute(''' | |
| CREATE TABLE IF NOT EXISTS sentences ( | |
| word TEXT, | |
| phonetic TEXT, | |
| sentence TEXT, | |
| source TEXT, | |
| model TEXT, | |
| created_at DATETIME DEFAULT CURRENT_TIMESTAMP, | |
| PRIMARY KEY (word, source, model) | |
| ) | |
| ''') | |
| conn.commit() | |
| conn.close() | |
| # 查詢句庫中的某個單字的所有例句 | |
| def get_sentences_by_word(word): | |
| conn = sqlite3.connect(DB_PATH) | |
| c = conn.cursor() | |
| c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,)) | |
| results = c.fetchall() | |
| conn.close() | |
| return results | |
| # 儲存句子到 SQLite | |
| def save_sentence(word, phonetic, sentence, source, model): | |
| conn = sqlite3.connect(DB_PATH) | |
| c = conn.cursor() | |
| c.execute(''' | |
| INSERT INTO sentences (word, phonetic, sentence, source, model) | |
| VALUES (?, ?, ?, ?, ?) | |
| ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic | |
| ''', (word, phonetic, sentence, source, model)) | |
| conn.commit() | |
| conn.close() | |
| # 隨機抽單字 + 查句庫 or GPT 生成例句 | |
| def get_words_with_sentences(source, n): | |
| try: | |
| words = get_words_from_source(source) | |
| selected_words = random.sample(words, n) | |
| result_display = "" | |
| for word_data in tqdm(selected_words, desc="處理單字"): | |
| word = word_data['word'] | |
| phonetic = word_data['phonetic'] | |
| # 查詢句庫 | |
| sentence_records = get_sentences_by_word(word) | |
| if sentence_records: | |
| # 優先取 Tatoeba | |
| sentence = "" | |
| for rec in sentence_records: | |
| if rec[3] == "tatoeba": # source 字段 | |
| sentence = rec[2] # sentence 字段 | |
| break | |
| if not sentence: | |
| sentence = sentence_records[0][2] | |
| source_used = sentence_records[0][3] | |
| model_used = sentence_records[0][4] | |
| else: | |
| # GPT 生成句子 | |
| sentence = generate_sentence(word, "EleutherAI/pythia-410m") | |
| source_used = "ai" | |
| model_used = "EleutherAI/pythia-410m" | |
| # 查詢音標,避免 GPT 生成時音標缺失 | |
| if not phonetic: | |
| word_info = get_word_info(source, word) | |
| phonetic = word_info['phonetic'] if word_info else '' | |
| # 存回句庫 | |
| save_sentence(word, phonetic, sentence, source_used, model_used) | |
| result_display += f""" | |
| <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;"> | |
| <strong>單字:</strong> {word} <br> | |
| <strong>音標:</strong> {phonetic or '無'} <br> | |
| <strong>句子:</strong> {sentence} <br> | |
| <strong>來源:</strong> {source_used} {f"({model_used})" if model_used else ""} | |
| </div> | |
| """ | |
| return result_display, f"✅ 成功抽取 {n} 個單字 & 句子" | |
| except Exception as e: | |
| return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}" | |