Spaces:

dayuian
/

VocabLine

Sleeping

App Files Files Community

VocabLine / sentences.py

dayuian

Create sentences.py

1e165e2 verified 10 months ago

raw

history blame

3.6 kB

	import sqlite3
	import os
	import random
	from ai_sentence import generate_sentence
	from vocab import get_words_from_source, get_word_info
	from tqdm import tqdm

	DATA_DIR = "./data"
	DB_PATH = os.path.join(DATA_DIR, "sentences.db")


	# 初始化資料庫（建表）
	def init_db():
	conn = sqlite3.connect(DB_PATH)
	c = conn.cursor()
	c.execute('''
	CREATE TABLE IF NOT EXISTS sentences (
	word TEXT,
	phonetic TEXT,
	sentence TEXT,
	source TEXT,
	model TEXT,
	created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
	PRIMARY KEY (word, source, model)
	)
	''')
	conn.commit()
	conn.close()


	# 查詢句庫中的某個單字的所有例句
	def get_sentences_by_word(word):
	conn = sqlite3.connect(DB_PATH)
	c = conn.cursor()
	c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,))
	results = c.fetchall()
	conn.close()
	return results


	# 儲存句子到 SQLite
	def save_sentence(word, phonetic, sentence, source, model):
	conn = sqlite3.connect(DB_PATH)
	c = conn.cursor()
	c.execute('''
	INSERT INTO sentences (word, phonetic, sentence, source, model)
	VALUES (?, ?, ?, ?, ?)
	ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
	''', (word, phonetic, sentence, source, model))
	conn.commit()
	conn.close()


	# 隨機抽單字 + 查句庫 or GPT 生成例句
	def get_words_with_sentences(source, n):
	try:
	words = get_words_from_source(source)
	selected_words = random.sample(words, n)

	result_display = ""
	for word_data in tqdm(selected_words, desc="處理單字"):
	word = word_data['word']
	phonetic = word_data['phonetic']

	# 查詢句庫
	sentence_records = get_sentences_by_word(word)

	if sentence_records:
	# 優先取 Tatoeba
	sentence = ""
	for rec in sentence_records:
	if rec[3] == "tatoeba": # source 字段
	sentence = rec[2] # sentence 字段
	break
	if not sentence:
	sentence = sentence_records[0][2]
	source_used = sentence_records[0][3]
	model_used = sentence_records[0][4]

	else:
	# GPT 生成句子
	sentence = generate_sentence(word, "EleutherAI/pythia-410m")
	source_used = "ai"
	model_used = "EleutherAI/pythia-410m"

	# 查詢音標，避免 GPT 生成時音標缺失
	if not phonetic:
	word_info = get_word_info(source, word)
	phonetic = word_info['phonetic'] if word_info else ''

	# 存回句庫
	save_sentence(word, phonetic, sentence, source_used, model_used)

	result_display += f"""
	<div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
	<strong>單字：</strong> {word} <br>
	<strong>音標：</strong> {phonetic or '無'} <br>
	<strong>句子：</strong> {sentence} <br>
	<strong>來源：</strong> {source_used} {f"({model_used})" if model_used else ""}
	</div>
	"""

	return result_display, f"✅ 成功抽取 {n} 個單字 & 句子"

	except Exception as e:
	return f"<p style='color:red;'>❌ 發生錯誤：{str(e)}</p>", f"❌ 錯誤：{str(e)}"