study_prompt / app.py
陳聖勳
add app.py
0521fe1
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import re
import gradio as gr
import json, random
from util.reading_constants import READING_LEVEL_CONFIG, FIX_PREFERENCE_PROMPT, FIX_HIGH_LEVEL_PROMPT, FIX_LOW_LEVEL_PROMPT
from util.cloze_constants import CLOZE_LEVEL_CONFIG, CLOZE_FIX_PREFERENCE_PROMPT, CLOZE_FIX_HIGH_LEVEL_PROMPT, CLOZE_FIX_LOW_LEVEL_PROMPT
from util.sentence_dealer import Sentence_Dealer
from util.judgement import READING_JUDGE_PROMPTS, CLOZE_JUDGE_PROMPTS, QUALIFICATION_SCHEMA
def load_json(data_path:str):
with open(data_path, "r", encoding="utf-8") as f:
result = json.load(f)
return result
# In[ ]:
data_folder_path = "./dataset/"
CHOICE_LABELS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF', 'AG', 'AI', 'AK', 'AL', 'AM', 'AN', 'AP', 'AR', 'AS', 'AT', 'AU', 'AV', 'AW', 'AX', 'BA', 'BB', 'BC', 'BD', 'BE', 'BF', 'BI', 'BL', 'BO', 'BR', 'BS', 'BU', 'BY', 'CA', 'CB', 'CC', 'CD', 'CE', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CP', 'CR', 'CS', 'CT', 'CV', 'DA', 'DB', 'DC', 'DD', 'DE', 'DF', 'DI', 'DK', 'DL', 'DM', 'DN', 'DO', 'DP', 'DR', 'DS', 'DT', 'DU', 'EB', 'EC', 'ED', 'EE', 'EF', 'EG', 'EL', 'EM', 'EN', 'EP', 'EQ', 'ER', 'ES', 'ET', 'EV', 'EX', 'EY', 'FA', 'FB', 'FC', 'FD', 'FE', 'FF', 'FI', 'FL', 'FM', 'FO', 'FP', 'FR', 'FS', 'FT', 'FX', 'GA', 'GB', 'GC', 'GE', 'GG', 'GL', 'GM', 'GN', 'GO', 'GP', 'GR', 'GS', 'GT', 'GV', 'HA', 'HC', 'HD', 'HE', 'HH', 'HI', 'HL', 'HO', 'HP', 'HR', 'HS', 'HT', 'IA', 'IB', 'IC', 'ID', 'IE', 'IF', 'IG', 'II', 'IK', 'IL', 'IM', 'IN', 'IO', 'IP', 'IR', 'IS', 'IT', 'IV', 'IX', 'IZ', 'JB', 'JS', 'KB', 'KE', 'KS', 'LA', 'LC', 'LD', 'LE', 'LI', 'LL', 'LM', 'LO', 'LP', 'LR', 'LS', 'LT', 'LY', 'MA', 'MB', 'MC', 'MD', 'ME', 'MI', 'ML', 'MM', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MW', 'MY', 'NA', 'NB', 'NC', 'ND', 'NE', 'NF', 'NG', 'NI', 'NL', 'NN', 'NO', 'NP', 'NR', 'NS', 'NT', 'NU', 'OB', 'OC', 'OD', 'OF', 'OH', 'OK', 'OL', 'OM', 'ON', 'OP', 'OR', 'OS', 'OT', 'OU', 'OW', 'PA', 'PC', 'PD', 'PE', 'PF', 'PG', 'PH', 'PI', 'PK', 'PL', 'PM', 'PN', 'PO', 'PP', 'PR', 'PS', 'PT', 'PU', 'PY', 'QL', 'QU', 'RA', 'RC', 'RE', 'RI', 'RL', 'RO', 'RS', 'RT', 'RU', 'RY', 'SA', 'SB', 'SC', 'SD', 'SE', 'SF', 'SG', 'SH', 'SI', 'SK', 'SL', 'SM', 'SN', 'SO', 'SP', 'SR', 'SS', 'ST', 'SU', 'SV', 'SW', 'SY', 'TA', 'TB', 'TC', 'TD', 'TE', 'TF', 'TH', 'TI', 'TL', 'TM', 'TO', 'TP', 'TR', 'TS', 'TT', 'TV', 'TW', 'TX', 'TY', 'UB', 'UC', 'UD', 'UE', 'UG', 'UI', 'UK', 'UL', 'UM', 'UN', 'UP', 'UR', 'US', 'UT', 'VA', 'VB', 'VC', 'VD', 'VE', 'VF', 'VI', 'VM', 'VO', 'VP', 'VS', 'WA', 'WD', 'WE', 'WF', 'WH', 'WI', 'WM', 'WN', 'WP', 'WR', 'WS', 'WT', 'XT', 'XV', 'XX', 'XY', 'YS', 'YY', 'ZE', 'ABC', 'ACE', 'ACK', 'ACT', 'ADD', 'AGE', 'ALL', 'AME', 'AML', 'AMP', 'AND', 'ANG', 'ANT', 'API', 'APP', 'ARD', 'ARN', 'ART', 'ARY', 'ASC', 'ASE', 'ASH', 'ASS', 'AST', 'ATA', 'ATE', 'ATH', 'AUT', 'AVA', 'AXI', 'BER', 'BIT', 'BUG', 'CAA', 'CAT', 'CCE', 'CCN', 'CES', 'CLA', 'CLC', 'CLI', 'COL', 'COM', 'CON', 'CRE', 'CSS', 'CUR', 'DAT', 'DAY', 'DBC', 'DEF', 'DER', 'DEX', 'DIR', 'DIS', 'DOC', 'DOM', 'EAR', 'ECK', 'ECT', 'ELD', 'EMA', 'END', 'ENT', 'ENV', 'ERE', 'ERR', 'ERS', 'ERT', 'ERY', 'EXT', 'FIG', 'FIX', 'FLA', 'FOR', 'GEN', 'GER', 'GET', 'HER', 'IAB', 'IAL', 'ICE', 'IDE', 'IES', 'IGN', 'III', 'ILL', 'IMA', 'IME', 'IND', 'INE', 'INF', 'ING', 'INT', 'ION', 'IOS', 'ISO', 'IST', 'ITE', 'ITH', 'ITY', 'IVE', 'JAX', 'KEY', 'LAB', 'LAY', 'LED', 'LES', 'LIC', 'LIN', 'LOB', 'LOC', 'LOG', 'LOW', 'MAN', 'MAP', 'MAX', 'MIN', 'MIT', 'MON', 'NER', 'NET', 'NEW', 'NOT', 'NUM', 'OFF', 'OIN', 'ONE', 'ONG', 'OPT', 'ORD', 'ORM', 'ORS', 'ORT', 'ORY', 'OST', 'OUR', 'OUT', 'PAR', 'PDF', 'PER', 'PHP', 'POS', 'PRE', 'PRI', 'PRO', 'PUT', 'QUE', 'RAM', 'RAY', 'RED', 'REE', 'REF', 'REG', 'RES', 'RGB', 'RIG', 'ROM', 'ROP', 'ROR', 'ROW', 'SBN', 'SDK', 'SEE', 'SER', 'SET', 'SHA', 'SON', 'SQL', 'SSL', 'SSN', 'STR', 'SUB', 'SUM', 'TAC', 'TAG', 'TER', 'THE', 'UES', 'UID', 'ULL', 'ULT', 'UMN', 'UND', 'UNT', 'URE', 'URI', 'URL', 'URN', 'USA', 'USE', 'UST', 'UTC', 'UTE', 'UTF', 'VAL', 'VAR', 'VER', 'VID', 'VIS', 'WID', 'WIN', 'WOR', 'XML', 'XXX', 'YES', 'YPE']
with open(data_folder_path + 'grammars.jsonl', 'r', encoding='utf-8') as f:
GRAMMAR_ID2INFO = {item['grammar_id']: item for item in [json.loads(line) for line in f]}
word2explanation_infos = load_json(data_folder_path + "MOE_word2explanations_8.json")
word2explanation_infos.update(load_json(data_folder_path + "TMLD_word2explanations_3.json"))
# 載入主題和副主題對應
subtopic_map = load_json(data_folder_path + "subtopic_map20251002v2.json")
dealer = Sentence_Dealer(ckiptagger_path="./CKIP/")
# In[3]:
#Modify the ckiptagger_path if you want
sample_sentence = """朝聞道夕死可矣"""
grammar_ids, grammar_range = dealer.list_all_grammars(sentence=sample_sentence)
# In[4]:
grammar_starts = [x[0] for x in grammar_range]
grammar_id2range = [(x,y) for x, y in sorted(zip(grammar_ids, grammar_starts), key=lambda p: p[1])]
print(grammar_id2range)
# In[5]:
def remove_example(text):
#return text.replace('「', '『').replace('」', '』')
if text.startswith('《'):
# 如果第一個字是《,保留直到第二次出現《之前的文字
tmp_str = re.sub(r'(《.*?《).*', r'\1', text).rstrip('《')
else:
# 否則,保留《之前的文字
tmp_str = re.sub(r'(.*?)《.*', r'\1', text)
tmp_str = tmp_str.split("如:")[0] #去除"如:"以後的字
index = tmp_str.find('。')
return tmp_str[:index + 1] if index != -1 else text #去除"。"以後的字
def sentence_segmentation(text):
sentence_list = text.split('。')
sentence_list = [sen.strip() + '。' for sen in sentence_list if sen.strip() != '']
return sentence_list
# In[6]:
sentence_segmentation(sample_sentence)
# In[7]:
## 為了方便理解,我們對任務做了簡化
# In[ ]:
# =========================
# 1. 函式定義區(先留空給你實作)
# =========================
def gen_textbook_prompt(
tbcl_level: str,
main_topic: str,
sub_topic: str,
extra_content: str,
article_type: str,
) -> str:
"""
課文生成 prompt 產生器
根據 TBCL 等級、主題、副主題、額外內容和文章類型生成適合的課文生成 Prompt
"""
# Prompt 模板庫(參考 inference_codes/extras/prompts.py)
PURE_TEMPLATES = [
"請以「{topics}」為主題,撰寫一篇 TBCL {level} 級的華語{textbook_type}課文。",
"請完成一篇有關「{topics}」且 TBCL 分級為 {level} 級的華語課文,課文形式為{textbook_type}。",
"以「{topics}」為題,編寫一篇華語課文。\nTBCL 分級為 {level} 級,要求格式為{textbook_type}",
"請提供一篇關於「{topics}」的課文。\n課文形式為{textbook_type},TBCL 分級為 {level} 級。",
"生成一篇難度為 TBCL {level} 級的{textbook_type}華語課文,內容需與「{topics}」相關",
"請針對「{topics}」寫出一篇華語{textbook_type}課文,難度必須符合 TBCL {level} 級。",
"請撰寫一篇華語課文。\n類型:{textbook_type}\n主題:{topics}\nTBCL 分級:{level}",
"請根據「{topics}」主題,撰寫一篇符合 TBCL {level} 級標準的華語{textbook_type}課文。",
]
SUBTOPIC_TEMPLATES = [
"請以「{topics}」為主題,選擇一至多個邏輯相關的子主題(如:{subtopics}),撰寫一篇TBCL {level}級的華語{textbook_type}課文。請確保選擇的面向能自然地融入內容中。",
"以「{topics}」為主軸,從此主題的相關面向中選擇一至多個相互關聯的元素,編寫一篇TBCL {level}級的{textbook_type}課文。內容須流暢地整合這些面向。",
"撰寫一篇TBCL {level}級的華語{textbook_type}課文。主題為「{topics}」,請自由選擇一至多個相關子主題(參考但不限於:{subtopics})。所選面向須具邏輯關聯,並在課文中自然呈現。",
"請以「{topics}」為核心主題,從相關子主題(如{subtopics})中挑選一至多個具關聯性的元素,編寫一篇TBCL {level}級的{textbook_type}課文。要求各面向之間的過渡自然,內容連貫。",
"以「{topics}」為主題創作一篇TBCL {level}級華語{textbook_type}課文。請自選一至多個相關連的子主題(可參考:{subtopics}),並設計合適的情境來呈現這些面向。",
]
# 參數驗證
valid_levels = ["第一級", "第二級", "第三級", "第四級", "第五級", "第六級"]
valid_topics = list(subtopic_map.keys())
valid_article_types = ["短文", "對話"]
if tbcl_level not in valid_levels:
return f"❌ 錯誤:TBCL 等級必須是以下之一:{', '.join(valid_levels)}"
if main_topic not in valid_topics:
return f"❌ 錯誤:主題必須是以下之一:{', '.join(valid_topics)}"
if article_type not in valid_article_types:
return f"❌ 錯誤:文章類型必須是以下之一:{', '.join(valid_article_types)}"
# 判斷使用哪種模式
# 如果有選擇副主題,使用 subtopic 模式;否則使用 pure 模式
if sub_topic and sub_topic in subtopic_map.get(main_topic, []):
# 使用副主題模式
template = random.choice(SUBTOPIC_TEMPLATES)
# 從該主題的副主題列表中隨機選擇幾個作為參考
all_subtopics = subtopic_map[main_topic]
# 確保選中的副主題在列表中,並添加其他幾個作為參考
sample_subtopics = [sub_topic]
other_subtopics = [s for s in all_subtopics if s != sub_topic]
sample_subtopics.extend(random.sample(other_subtopics, min(4, len(other_subtopics))))
subtopics_text = "、".join(sample_subtopics)
prompt = template.format(
topics=main_topic,
level=tbcl_level,
textbook_type=article_type,
subtopics=subtopics_text
)
else:
# 使用純主題模式
template = random.choice(PURE_TEMPLATES)
prompt = template.format(
topics=main_topic,
level=tbcl_level,
textbook_type=article_type
)
# 如果有額外內容,附加到 prompt 後面
if extra_content and extra_content.strip():
prompt += f"\n\n【額外要求】\n{extra_content.strip()}"
return prompt
def gen_word_sense_prompt(
sentence: str,
target_word: str,
) -> str:
"""
詞意消歧 prompt 產生器
"""
chat_template = "請判斷「%s」在以下句子中為何種解釋,並直接輸出正確的選項代號。\n%s\n\n%s"
if target_word not in sentence:
return "此詞彙未出現在句子中。"
elif target_word not in word2explanation_infos.keys():
return "此詞彙未出現在字典。"
else:
split_idx = sentence.index(target_word)
sentence = sentence[:split_idx]+'「'+target_word+'」'+sentence[split_idx+len(target_word):]
classfication_table = ""
for i, explaination_info in enumerate(word2explanation_infos[target_word]):
classfication_table += CHOICE_LABELS[i] + ". " + remove_example(explaination_info[0]) + "\n"
prompt = chat_template % (target_word, sentence, classfication_table[:-1])
return prompt
def gen_word_info(sentence:str, target_word:str, target_choice:str):
columns = ["釋義", "TBCL等級", "注音", "漢語拼音", "詞性", "英文翻譯", "例句", "例句漢語拼音", "例句翻譯"]
if target_word not in sentence:
return "此詞彙未出現在句子中。"
elif target_word not in word2explanation_infos.keys():
return "此詞彙未出現在字典。"
elif target_choice not in CHOICE_LABELS:
return "請填寫純英文代號。"
ans_id = CHOICE_LABELS.index(target_choice)
explanations = word2explanation_infos[target_word]
explanation_ans = explanations[ans_id][:6]+explanations[ans_id][6][0]
response_str = ""
for column, explanation_info in zip(columns, explanation_ans):
response_str += "%s: %s\n" %(column, explanation_info)
return response_str[:-1]
def gen_grammar_analysis_prompt(
textbook: str,
) -> str:
"""
課文語法分析 prompt 產生器
"""
chat_template = "請閱讀以下句子並根據解釋選出句子中有使用到的語法,請直接輸出選項代號,若有多個答案則使用頓號(、)分隔。\n%s\n\n"
prompt = chat_template % (textbook)
possible_grammars_list = []
for sentence in sentence_segmentation(textbook):
grammar_ids, grammar_starts = dealer.list_all_grammars(sentence)
grammar_starts = [x[0] for x in grammar_range]
grammar_ids_sorted = [x for x, _ in sorted(zip(grammar_ids, grammar_starts), key=lambda p: p[1])]
for grammar_id in grammar_ids_sorted:
if isinstance(grammar_id, list):
for id in grammar_id:
possible_grammars_list.append(id)
else:
possible_grammars_list.append(grammar_id)
already_have = []
for i, id in enumerate(possible_grammars_list):
if id not in already_have:
detail = GRAMMAR_ID2INFO[id]
prompt += f"{CHOICE_LABELS[i]}. {detail['grammar_name']}: \n{detail['way_to_use_zh']}\n\n"
already_have.append(id)
return prompt[:-1]
def gen_reading_test_prompt(
pre_text: str,
tocfl_level: str,
style: str,
) -> str:
"""
閱讀測驗生成 prompt 產生器
"""
level_map = {"入門基礎":"A-入門基礎", "進階高階":"B-進階高階", "流利精通":"C-流利精通"}
level = level_map[tocfl_level]
level_config = READING_LEVEL_CONFIG[level]
system_prompt = level_config["system_prompt"]
definition_prompt = level_config["definition_prompt"]
asking_prompt_templates = level_config["asking_prompt_templates"]
reference_asking_prompt_template = level_config["reference_asking_prompt_template"]
topic_classes = level_config["topic_classes"]
style_distribution = level_config["style_distribution"]
#conversation_distribution = level_config["conversation_distribution"]
question_type_distribution = level_config["question_type_distribution"]
json_schema = level_config["json_schema"]
prompt = system_prompt + "\n"
question_type = None
# --- 抽 topic(等機率) ---
topic_class = random.choice(topic_classes)
# --- 按機率抽 style ---
#styles, style_probs = zip(*style_distribution.items())
#style = random.choices(styles, weights=style_probs, k=1)[0]
#conversation_types, conversation_types = zip(*conversation_distribution.items())
#conversation = random.choices(conversation_types, weights=conversation_types, k=1)[0]
#if conversation == "含對話":
# style = style + ("(含對話)")
#else:
# style = style + ("(純敘述)")
# --- 按機率抽 question_type ---
if question_type_distribution is not None:
qtypes, q_probs = zip(*question_type_distribution.items())
question_type = random.choices(qtypes, weights=q_probs, k=1)[0]
if question_type:
reference_asking_prompt = reference_asking_prompt_template.safe_substitute(style=style, question_type=question_type, content=pre_text)
else:
reference_asking_prompt = reference_asking_prompt_template.safe_substitute(style=style, content=pre_text)
response_rule = "【json_schema 輸出格式】"+"\n"+ str(json_schema)
return prompt+definition_prompt+reference_asking_prompt+"\n\n"+response_rule
def gen_cloze_prompts(
pre_text: str,
tocfl_level: str,
) -> str:
"""
選詞填空生成 prompt 產生器
"""
level_map = {"入門基礎":"A-入門基礎", "進階高階":"B-進階高階", "流利精通":"C-流利精通"}
level = level_map[tocfl_level]
level_config = CLOZE_LEVEL_CONFIG[level]
word_list = level_config["word_list"]
grammar_list = level_config["grammar_list"]
all_patterns = level_config["all_patterns"]
examples = level_config["examples"]
system_prompt = level_config["system_prompt"]
prompt_definition_template = level_config["prompt_definition_template"]
asking_prompt_1 = level_config["asking_prompt_1"]
reference_asking_prompt_1_template = level_config["reference_asking_prompt_1_template"]
perference_prompts = level_config["perference_prompts"]
asking_prompt_2_template = level_config["asking_prompt_2_template"]
json_schema = level_config["json_schema"]
word_text = "、".join(random.sample(word_list, min(30, len(word_list))))
grammar_text = "、".join(random.sample(grammar_list, min(10, len(grammar_list))))
pattern_text = "、".join(random.sample(all_patterns, min(9, len(all_patterns))))
prompt_definition = prompt_definition_template.safe_substitute(level_word_list=word_text, level_grammar_list=grammar_text, level_pattern_list=pattern_text, level_examples=examples)
pre_text_prompt = "【前置課文】" + "\n" + pre_text
asking_prompt = "請思考讀者學習完【前置課文】後的克漏字填空出題方向,其中克漏字填空裡的[完整文章]是【前置課文】的延伸,兩者需保持一定的相關性,但不需重複。請先根據【要求】的規範生成不帶任何空格的[完整文章]。"
chat_prompt1 = system_prompt + "\n" + prompt_definition + pre_text_prompt + '\n' + asking_prompt
chat_prompt2 = asking_prompt_2_template.safe_substitute(level_preference1=perference_prompts[0], level_preference2=perference_prompts[1], patterns_text=pattern_text)
return chat_prompt1, chat_prompt2
# In[ ]:
# =========================
# 2. Gradio UI 定義
# =========================
TBCL_LEVELS = ["第一級", "第二級", "第三級", "第四級", "第五級", "第六級"]
TOCFL_LEVELS = ["入門基礎", "進階高階", "流利精通"]
ARTICLE_STYLES = ["記敘文", "議論文", "説明文", "抒情文"]
ARTICLE_TYPES = ["短文", "對話"]
MAIN_TOPICS = list(subtopic_map.keys()) # 從 subtopic_map 取得主題列表
NEED_FIX_STATUS = ["生成題目TOCFL等級過高", "生成題目TOCFL等級過低", "生成題目品質不過關"]
with gr.Blocks(title="教育任務 Prompt 產生器") as demo:
gr.Markdown("# 🧩 教育任務 Prompt 產生器")
with gr.Tab("課文生成"):
tbcl_level_in = gr.Dropdown(
choices=TBCL_LEVELS,
value="第一級",
label="TBCL 等級",
)
main_topic_in = gr.Dropdown(
choices=MAIN_TOPICS,
value=MAIN_TOPICS[0],
label="主題",
)
sub_topic_in = gr.Dropdown(
choices=[],
value=None,
label="副主題(選填)",
interactive=True,
)
extra_content_in = gr.Textbox(
lines=5,
label="額外內容",
placeholder="可填寫想加入的設定、情境說明等(選填)",
)
article_type_in = gr.Dropdown(
choices=ARTICLE_TYPES,
value="短文",
label="文章類型",
)
gen_button_1 = gr.Button("產生課文生成 Prompt")
output_1 = gr.Textbox(
lines=10,
label="課文生成 Prompt(string)",
)
def _update_subtopics(main_topic):
"""當主題改變時,更新副主題選項"""
if main_topic in subtopic_map:
subtopics = subtopic_map[main_topic]
return gr.Dropdown(choices=subtopics, value=None)
return gr.Dropdown(choices=[], value=None)
def _on_gen_textbook(tbcl_level, main_topic, sub_topic, extra_content, article_type):
return gen_textbook_prompt(
tbcl_level=tbcl_level,
main_topic=main_topic,
sub_topic=sub_topic,
extra_content=extra_content,
article_type=article_type,
)
# 當主題改變時,更新副主題選項
main_topic_in.change(
_update_subtopics,
inputs=[main_topic_in],
outputs=[sub_topic_in],
)
gen_button_1.click(
_on_gen_textbook,
inputs=[tbcl_level_in, main_topic_in, sub_topic_in, extra_content_in, article_type_in],
outputs=output_1,
)
with gr.Tab("詞意消歧"):
sentence_in = gr.Textbox(
lines=5,
label="句子",
placeholder="請輸入包含目標詞彙的完整句子",
)
target_word_in = gr.Textbox(
lines=1,
label="詞彙",
placeholder="請輸入要進行詞意消歧的詞彙",
)
gen_button_2 = gr.Button("產生詞意消歧 Prompt")
output_2 = gr.Textbox(
lines=8,
label="詞意消歧 Prompt(string)",
)
target_choice_in = gr.Textbox(
lines=1,
label="選項",
placeholder="請輸入正確的選項",
)
gen_button_2_2 = gr.Button("產生詞彙釋義相關資訊")
output_3 = gr.Textbox(
lines=10,
label="詞彙釋義資訊",
)
def _on_gen_word_sense(sentence, target_word):
return gen_word_sense_prompt(sentence=sentence, target_word=target_word)
gen_button_2.click(
_on_gen_word_sense,
inputs=[sentence_in, target_word_in],
outputs=output_2,
)
def _on_gen_word_info(sentence, target_word, target_choice):
return gen_word_info(sentence=sentence, target_word=target_word, target_choice=target_choice)
gen_button_2_2.click(
_on_gen_word_info,
inputs=[sentence_in, target_word_in, target_choice_in],
outputs=output_3,
)
with gr.Tab("課文語法分析"):
textbook_in = gr.Textbox(
lines=10,
label="課文",
placeholder="請貼上完整課文內容",
)
gen_button_3 = gr.Button("產生課文語法分析 Prompt")
output_3 = gr.Textbox(
lines=8,
label="課文語法分析 Prompt(string)",
)
def _on_gen_grammar(textbook):
return gen_grammar_analysis_prompt(textbook=textbook)
gen_button_3.click(
_on_gen_grammar,
inputs=[textbook_in],
outputs=output_3,
)
with gr.Tab("閱讀測驗生成"):
pre_text_in = gr.Textbox(
lines=10,
label="前置課文",
placeholder="請貼上作為閱讀測驗基礎的課文",
)
tocfl_level_in_1 = gr.Dropdown(
choices=TOCFL_LEVELS,
value="入門基礎",
label="TOCFL 等級",
)
article_style_in = gr.Dropdown(
choices=ARTICLE_STYLES,
value="記敘文",
label="題目文章文體",
)
gen_button_4 = gr.Button("產生閱讀測驗 Prompt")
output_4 = gr.Textbox(
lines=8,
label="閱讀測驗生成 Prompt(string)",
)
judgement_level_status_in = gr.Dropdown(
choices=NEED_FIX_STATUS,
value="題目等級過高",
label="遇到需要修正的評估結果",
)
gen_button_4_2 = gr.Button("產生修正等級的 Prompt")
output_4_2 = gr.Textbox(
lines=1,
label="修正Prompt生成",
)
def _on_gen_reading_test(pre_text, tocfl_level, style):
return gen_reading_test_prompt(
pre_text=pre_text,
tocfl_level=tocfl_level,
style=style,
)
gen_button_4.click(
_on_gen_reading_test,
inputs=[pre_text_in, tocfl_level_in_1, article_style_in],
outputs=output_4,
)
gr.Markdown("## 品質評估用的prompt_1\n" + READING_JUDGE_PROMPTS.system + "\n\n[待評估之課文]")
gr.Markdown("## 品質評估用的prompt_2\n" + READING_JUDGE_PROMPTS.additional_check)
gr.Markdown("## 品質評估用的prompt_3 (格式化輸出)\n" +"【json_schema 輸出格式】\n\n"+ str(READING_JUDGE_PROMPTS.json_schema))
def _on_gen_reading_test(judgement_level_status):
if judgement_level_status == "生成題目TOCFL等級過高":
return FIX_HIGH_LEVEL_PROMPT
elif judgement_level_status == "生成題目TOCFL等級過低":
return FIX_LOW_LEVEL_PROMPT
elif judgement_level_status == "生成題目品質不過關":
return FIX_PREFERENCE_PROMPT
gen_button_4_2.click(
_on_gen_reading_test,
inputs=[judgement_level_status_in],
outputs=output_4_2,
)
with gr.Tab("選詞填空生成"):
pre_text_in_2 = gr.Textbox(
lines=10,
label="前置課文",
placeholder="請貼上作為選詞填空基礎的課文",
)
tocfl_level_in_2 = gr.Dropdown(
choices=TOCFL_LEVELS[1:],
value="進階高階",
label="TOCFL 等級",
)
gen_button_5 = gr.Button("產生完整文章生成 Prompt")
output_5 = gr.Textbox(
lines=8,
label="選詞填空完整文章生成 Prompt(string)",
)
gen_button_5_2 = gr.Button("產生選詞填空出題 Prompt")
output_5_2 = gr.Textbox(
lines=8,
label="選詞填空題目生成 Prompt(string)",
)
gr.Markdown("## 品質評估用的prompt_1\n" + CLOZE_JUDGE_PROMPTS.system + "\n\n[待評估之課文]")
gr.Markdown("## 品質評估用的prompt_2\n" + CLOZE_JUDGE_PROMPTS.additional_check)
gr.Markdown("## 品質評估用的prompt_3 (格式化輸出)\n" +"【json_schema 輸出格式】\n\n"+ str(CLOZE_JUDGE_PROMPTS.json_schema))
def _on_gen_cloze_complete_text(pre_text, tocfl_level):
return gen_cloze_prompts(
pre_text=pre_text,
tocfl_level=tocfl_level,
)[0]
gen_button_5.click(
_on_gen_cloze_complete_text,
inputs=[pre_text_in_2, tocfl_level_in_2],
outputs=output_5,
)
def _on_gen_cloze_text(pre_text, tocfl_level):
return gen_cloze_prompts(
pre_text=pre_text,
tocfl_level=tocfl_level,
)[1]
gen_button_5.click(
_on_gen_cloze_text,
inputs=[pre_text_in_2, tocfl_level_in_2],
outputs=output_5_2,
)
# In[10]:
# 直接執行:python this_file.py 後 demo.launch()
if __name__ == "__main__":
demo.launch(share=True)
#demo.launch(share=True)
# In[11]:
WORD_LIST_COLUMN_NAMES = ["繁體中文", "TBCL等級", "注音", "漢語拼音", "詞性", "英文翻譯", "例句", "例句漢語拼音", "例句翻譯", "位置", "釋義", "釋義ID"]
# In[ ]: