Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # In[1]: | |
| import re | |
| import gradio as gr | |
| import json, random | |
| from util.reading_constants import READING_LEVEL_CONFIG, FIX_PREFERENCE_PROMPT, FIX_HIGH_LEVEL_PROMPT, FIX_LOW_LEVEL_PROMPT | |
| from util.cloze_constants import CLOZE_LEVEL_CONFIG, CLOZE_FIX_PREFERENCE_PROMPT, CLOZE_FIX_HIGH_LEVEL_PROMPT, CLOZE_FIX_LOW_LEVEL_PROMPT | |
| from util.sentence_dealer import Sentence_Dealer | |
| from util.judgement import READING_JUDGE_PROMPTS, CLOZE_JUDGE_PROMPTS, QUALIFICATION_SCHEMA | |
| def load_json(data_path:str): | |
| with open(data_path, "r", encoding="utf-8") as f: | |
| result = json.load(f) | |
| return result | |
| # In[ ]: | |
| data_folder_path = "./dataset/" | |
| CHOICE_LABELS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF', 'AG', 'AI', 'AK', 'AL', 'AM', 'AN', 'AP', 'AR', 'AS', 'AT', 'AU', 'AV', 'AW', 'AX', 'BA', 'BB', 'BC', 'BD', 'BE', 'BF', 'BI', 'BL', 'BO', 'BR', 'BS', 'BU', 'BY', 'CA', 'CB', 'CC', 'CD', 'CE', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CP', 'CR', 'CS', 'CT', 'CV', 'DA', 'DB', 'DC', 'DD', 'DE', 'DF', 'DI', 'DK', 'DL', 'DM', 'DN', 'DO', 'DP', 'DR', 'DS', 'DT', 'DU', 'EB', 'EC', 'ED', 'EE', 'EF', 'EG', 'EL', 'EM', 'EN', 'EP', 'EQ', 'ER', 'ES', 'ET', 'EV', 'EX', 'EY', 'FA', 'FB', 'FC', 'FD', 'FE', 'FF', 'FI', 'FL', 'FM', 'FO', 'FP', 'FR', 'FS', 'FT', 'FX', 'GA', 'GB', 'GC', 'GE', 'GG', 'GL', 'GM', 'GN', 'GO', 'GP', 'GR', 'GS', 'GT', 'GV', 'HA', 'HC', 'HD', 'HE', 'HH', 'HI', 'HL', 'HO', 'HP', 'HR', 'HS', 'HT', 'IA', 'IB', 'IC', 'ID', 'IE', 'IF', 'IG', 'II', 'IK', 'IL', 'IM', 'IN', 'IO', 'IP', 'IR', 'IS', 'IT', 'IV', 'IX', 'IZ', 'JB', 'JS', 'KB', 'KE', 'KS', 'LA', 'LC', 'LD', 'LE', 'LI', 'LL', 'LM', 'LO', 'LP', 'LR', 'LS', 'LT', 'LY', 'MA', 'MB', 'MC', 'MD', 'ME', 'MI', 'ML', 'MM', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MW', 'MY', 'NA', 'NB', 'NC', 'ND', 'NE', 'NF', 'NG', 'NI', 'NL', 'NN', 'NO', 'NP', 'NR', 'NS', 'NT', 'NU', 'OB', 'OC', 'OD', 'OF', 'OH', 'OK', 'OL', 'OM', 'ON', 'OP', 'OR', 'OS', 'OT', 'OU', 'OW', 'PA', 'PC', 'PD', 'PE', 'PF', 'PG', 'PH', 'PI', 'PK', 'PL', 'PM', 'PN', 'PO', 'PP', 'PR', 'PS', 'PT', 'PU', 'PY', 'QL', 'QU', 'RA', 'RC', 'RE', 'RI', 'RL', 'RO', 'RS', 'RT', 'RU', 'RY', 'SA', 'SB', 'SC', 'SD', 'SE', 'SF', 'SG', 'SH', 'SI', 'SK', 'SL', 'SM', 'SN', 'SO', 'SP', 'SR', 'SS', 'ST', 'SU', 'SV', 'SW', 'SY', 'TA', 'TB', 'TC', 'TD', 'TE', 'TF', 'TH', 'TI', 'TL', 'TM', 'TO', 'TP', 'TR', 'TS', 'TT', 'TV', 'TW', 'TX', 'TY', 'UB', 'UC', 'UD', 'UE', 'UG', 'UI', 'UK', 'UL', 'UM', 'UN', 'UP', 'UR', 'US', 'UT', 'VA', 'VB', 'VC', 'VD', 'VE', 'VF', 'VI', 'VM', 'VO', 'VP', 'VS', 'WA', 'WD', 'WE', 'WF', 'WH', 'WI', 'WM', 'WN', 'WP', 'WR', 'WS', 'WT', 'XT', 'XV', 'XX', 'XY', 'YS', 'YY', 'ZE', 'ABC', 'ACE', 'ACK', 'ACT', 'ADD', 'AGE', 'ALL', 'AME', 'AML', 'AMP', 'AND', 'ANG', 'ANT', 'API', 'APP', 'ARD', 'ARN', 'ART', 'ARY', 'ASC', 'ASE', 'ASH', 'ASS', 'AST', 'ATA', 'ATE', 'ATH', 'AUT', 'AVA', 'AXI', 'BER', 'BIT', 'BUG', 'CAA', 'CAT', 'CCE', 'CCN', 'CES', 'CLA', 'CLC', 'CLI', 'COL', 'COM', 'CON', 'CRE', 'CSS', 'CUR', 'DAT', 'DAY', 'DBC', 'DEF', 'DER', 'DEX', 'DIR', 'DIS', 'DOC', 'DOM', 'EAR', 'ECK', 'ECT', 'ELD', 'EMA', 'END', 'ENT', 'ENV', 'ERE', 'ERR', 'ERS', 'ERT', 'ERY', 'EXT', 'FIG', 'FIX', 'FLA', 'FOR', 'GEN', 'GER', 'GET', 'HER', 'IAB', 'IAL', 'ICE', 'IDE', 'IES', 'IGN', 'III', 'ILL', 'IMA', 'IME', 'IND', 'INE', 'INF', 'ING', 'INT', 'ION', 'IOS', 'ISO', 'IST', 'ITE', 'ITH', 'ITY', 'IVE', 'JAX', 'KEY', 'LAB', 'LAY', 'LED', 'LES', 'LIC', 'LIN', 'LOB', 'LOC', 'LOG', 'LOW', 'MAN', 'MAP', 'MAX', 'MIN', 'MIT', 'MON', 'NER', 'NET', 'NEW', 'NOT', 'NUM', 'OFF', 'OIN', 'ONE', 'ONG', 'OPT', 'ORD', 'ORM', 'ORS', 'ORT', 'ORY', 'OST', 'OUR', 'OUT', 'PAR', 'PDF', 'PER', 'PHP', 'POS', 'PRE', 'PRI', 'PRO', 'PUT', 'QUE', 'RAM', 'RAY', 'RED', 'REE', 'REF', 'REG', 'RES', 'RGB', 'RIG', 'ROM', 'ROP', 'ROR', 'ROW', 'SBN', 'SDK', 'SEE', 'SER', 'SET', 'SHA', 'SON', 'SQL', 'SSL', 'SSN', 'STR', 'SUB', 'SUM', 'TAC', 'TAG', 'TER', 'THE', 'UES', 'UID', 'ULL', 'ULT', 'UMN', 'UND', 'UNT', 'URE', 'URI', 'URL', 'URN', 'USA', 'USE', 'UST', 'UTC', 'UTE', 'UTF', 'VAL', 'VAR', 'VER', 'VID', 'VIS', 'WID', 'WIN', 'WOR', 'XML', 'XXX', 'YES', 'YPE'] | |
| with open(data_folder_path + 'grammars.jsonl', 'r', encoding='utf-8') as f: | |
| GRAMMAR_ID2INFO = {item['grammar_id']: item for item in [json.loads(line) for line in f]} | |
| word2explanation_infos = load_json(data_folder_path + "MOE_word2explanations_8.json") | |
| word2explanation_infos.update(load_json(data_folder_path + "TMLD_word2explanations_3.json")) | |
| # 載入主題和副主題對應 | |
| subtopic_map = load_json(data_folder_path + "subtopic_map20251002v2.json") | |
| dealer = Sentence_Dealer(ckiptagger_path="./CKIP/") | |
| # In[3]: | |
| #Modify the ckiptagger_path if you want | |
| sample_sentence = """朝聞道夕死可矣""" | |
| grammar_ids, grammar_range = dealer.list_all_grammars(sentence=sample_sentence) | |
| # In[4]: | |
| grammar_starts = [x[0] for x in grammar_range] | |
| grammar_id2range = [(x,y) for x, y in sorted(zip(grammar_ids, grammar_starts), key=lambda p: p[1])] | |
| print(grammar_id2range) | |
| # In[5]: | |
| def remove_example(text): | |
| #return text.replace('「', '『').replace('」', '』') | |
| if text.startswith('《'): | |
| # 如果第一個字是《,保留直到第二次出現《之前的文字 | |
| tmp_str = re.sub(r'(《.*?《).*', r'\1', text).rstrip('《') | |
| else: | |
| # 否則,保留《之前的文字 | |
| tmp_str = re.sub(r'(.*?)《.*', r'\1', text) | |
| tmp_str = tmp_str.split("如:")[0] #去除"如:"以後的字 | |
| index = tmp_str.find('。') | |
| return tmp_str[:index + 1] if index != -1 else text #去除"。"以後的字 | |
| def sentence_segmentation(text): | |
| sentence_list = text.split('。') | |
| sentence_list = [sen.strip() + '。' for sen in sentence_list if sen.strip() != ''] | |
| return sentence_list | |
| # In[6]: | |
| sentence_segmentation(sample_sentence) | |
| # In[7]: | |
| ## 為了方便理解,我們對任務做了簡化 | |
| # In[ ]: | |
| # ========================= | |
| # 1. 函式定義區(先留空給你實作) | |
| # ========================= | |
| def gen_textbook_prompt( | |
| tbcl_level: str, | |
| main_topic: str, | |
| sub_topic: str, | |
| extra_content: str, | |
| article_type: str, | |
| ) -> str: | |
| """ | |
| 課文生成 prompt 產生器 | |
| 根據 TBCL 等級、主題、副主題、額外內容和文章類型生成適合的課文生成 Prompt | |
| """ | |
| # Prompt 模板庫(參考 inference_codes/extras/prompts.py) | |
| PURE_TEMPLATES = [ | |
| "請以「{topics}」為主題,撰寫一篇 TBCL {level} 級的華語{textbook_type}課文。", | |
| "請完成一篇有關「{topics}」且 TBCL 分級為 {level} 級的華語課文,課文形式為{textbook_type}。", | |
| "以「{topics}」為題,編寫一篇華語課文。\nTBCL 分級為 {level} 級,要求格式為{textbook_type}", | |
| "請提供一篇關於「{topics}」的課文。\n課文形式為{textbook_type},TBCL 分級為 {level} 級。", | |
| "生成一篇難度為 TBCL {level} 級的{textbook_type}華語課文,內容需與「{topics}」相關", | |
| "請針對「{topics}」寫出一篇華語{textbook_type}課文,難度必須符合 TBCL {level} 級。", | |
| "請撰寫一篇華語課文。\n類型:{textbook_type}\n主題:{topics}\nTBCL 分級:{level}", | |
| "請根據「{topics}」主題,撰寫一篇符合 TBCL {level} 級標準的華語{textbook_type}課文。", | |
| ] | |
| SUBTOPIC_TEMPLATES = [ | |
| "請以「{topics}」為主題,選擇一至多個邏輯相關的子主題(如:{subtopics}),撰寫一篇TBCL {level}級的華語{textbook_type}課文。請確保選擇的面向能自然地融入內容中。", | |
| "以「{topics}」為主軸,從此主題的相關面向中選擇一至多個相互關聯的元素,編寫一篇TBCL {level}級的{textbook_type}課文。內容須流暢地整合這些面向。", | |
| "撰寫一篇TBCL {level}級的華語{textbook_type}課文。主題為「{topics}」,請自由選擇一至多個相關子主題(參考但不限於:{subtopics})。所選面向須具邏輯關聯,並在課文中自然呈現。", | |
| "請以「{topics}」為核心主題,從相關子主題(如{subtopics})中挑選一至多個具關聯性的元素,編寫一篇TBCL {level}級的{textbook_type}課文。要求各面向之間的過渡自然,內容連貫。", | |
| "以「{topics}」為主題創作一篇TBCL {level}級華語{textbook_type}課文。請自選一至多個相關連的子主題(可參考:{subtopics}),並設計合適的情境來呈現這些面向。", | |
| ] | |
| # 參數驗證 | |
| valid_levels = ["第一級", "第二級", "第三級", "第四級", "第五級", "第六級"] | |
| valid_topics = list(subtopic_map.keys()) | |
| valid_article_types = ["短文", "對話"] | |
| if tbcl_level not in valid_levels: | |
| return f"❌ 錯誤:TBCL 等級必須是以下之一:{', '.join(valid_levels)}" | |
| if main_topic not in valid_topics: | |
| return f"❌ 錯誤:主題必須是以下之一:{', '.join(valid_topics)}" | |
| if article_type not in valid_article_types: | |
| return f"❌ 錯誤:文章類型必須是以下之一:{', '.join(valid_article_types)}" | |
| # 判斷使用哪種模式 | |
| # 如果有選擇副主題,使用 subtopic 模式;否則使用 pure 模式 | |
| if sub_topic and sub_topic in subtopic_map.get(main_topic, []): | |
| # 使用副主題模式 | |
| template = random.choice(SUBTOPIC_TEMPLATES) | |
| # 從該主題的副主題列表中隨機選擇幾個作為參考 | |
| all_subtopics = subtopic_map[main_topic] | |
| # 確保選中的副主題在列表中,並添加其他幾個作為參考 | |
| sample_subtopics = [sub_topic] | |
| other_subtopics = [s for s in all_subtopics if s != sub_topic] | |
| sample_subtopics.extend(random.sample(other_subtopics, min(4, len(other_subtopics)))) | |
| subtopics_text = "、".join(sample_subtopics) | |
| prompt = template.format( | |
| topics=main_topic, | |
| level=tbcl_level, | |
| textbook_type=article_type, | |
| subtopics=subtopics_text | |
| ) | |
| else: | |
| # 使用純主題模式 | |
| template = random.choice(PURE_TEMPLATES) | |
| prompt = template.format( | |
| topics=main_topic, | |
| level=tbcl_level, | |
| textbook_type=article_type | |
| ) | |
| # 如果有額外內容,附加到 prompt 後面 | |
| if extra_content and extra_content.strip(): | |
| prompt += f"\n\n【額外要求】\n{extra_content.strip()}" | |
| return prompt | |
| def gen_word_sense_prompt( | |
| sentence: str, | |
| target_word: str, | |
| ) -> str: | |
| """ | |
| 詞意消歧 prompt 產生器 | |
| """ | |
| chat_template = "請判斷「%s」在以下句子中為何種解釋,並直接輸出正確的選項代號。\n%s\n\n%s" | |
| if target_word not in sentence: | |
| return "此詞彙未出現在句子中。" | |
| elif target_word not in word2explanation_infos.keys(): | |
| return "此詞彙未出現在字典。" | |
| else: | |
| split_idx = sentence.index(target_word) | |
| sentence = sentence[:split_idx]+'「'+target_word+'」'+sentence[split_idx+len(target_word):] | |
| classfication_table = "" | |
| for i, explaination_info in enumerate(word2explanation_infos[target_word]): | |
| classfication_table += CHOICE_LABELS[i] + ". " + remove_example(explaination_info[0]) + "\n" | |
| prompt = chat_template % (target_word, sentence, classfication_table[:-1]) | |
| return prompt | |
| def gen_word_info(sentence:str, target_word:str, target_choice:str): | |
| columns = ["釋義", "TBCL等級", "注音", "漢語拼音", "詞性", "英文翻譯", "例句", "例句漢語拼音", "例句翻譯"] | |
| if target_word not in sentence: | |
| return "此詞彙未出現在句子中。" | |
| elif target_word not in word2explanation_infos.keys(): | |
| return "此詞彙未出現在字典。" | |
| elif target_choice not in CHOICE_LABELS: | |
| return "請填寫純英文代號。" | |
| ans_id = CHOICE_LABELS.index(target_choice) | |
| explanations = word2explanation_infos[target_word] | |
| explanation_ans = explanations[ans_id][:6]+explanations[ans_id][6][0] | |
| response_str = "" | |
| for column, explanation_info in zip(columns, explanation_ans): | |
| response_str += "%s: %s\n" %(column, explanation_info) | |
| return response_str[:-1] | |
| def gen_grammar_analysis_prompt( | |
| textbook: str, | |
| ) -> str: | |
| """ | |
| 課文語法分析 prompt 產生器 | |
| """ | |
| chat_template = "請閱讀以下句子並根據解釋選出句子中有使用到的語法,請直接輸出選項代號,若有多個答案則使用頓號(、)分隔。\n%s\n\n" | |
| prompt = chat_template % (textbook) | |
| possible_grammars_list = [] | |
| for sentence in sentence_segmentation(textbook): | |
| grammar_ids, grammar_starts = dealer.list_all_grammars(sentence) | |
| grammar_starts = [x[0] for x in grammar_range] | |
| grammar_ids_sorted = [x for x, _ in sorted(zip(grammar_ids, grammar_starts), key=lambda p: p[1])] | |
| for grammar_id in grammar_ids_sorted: | |
| if isinstance(grammar_id, list): | |
| for id in grammar_id: | |
| possible_grammars_list.append(id) | |
| else: | |
| possible_grammars_list.append(grammar_id) | |
| already_have = [] | |
| for i, id in enumerate(possible_grammars_list): | |
| if id not in already_have: | |
| detail = GRAMMAR_ID2INFO[id] | |
| prompt += f"{CHOICE_LABELS[i]}. {detail['grammar_name']}: \n{detail['way_to_use_zh']}\n\n" | |
| already_have.append(id) | |
| return prompt[:-1] | |
| def gen_reading_test_prompt( | |
| pre_text: str, | |
| tocfl_level: str, | |
| style: str, | |
| ) -> str: | |
| """ | |
| 閱讀測驗生成 prompt 產生器 | |
| """ | |
| level_map = {"入門基礎":"A-入門基礎", "進階高階":"B-進階高階", "流利精通":"C-流利精通"} | |
| level = level_map[tocfl_level] | |
| level_config = READING_LEVEL_CONFIG[level] | |
| system_prompt = level_config["system_prompt"] | |
| definition_prompt = level_config["definition_prompt"] | |
| asking_prompt_templates = level_config["asking_prompt_templates"] | |
| reference_asking_prompt_template = level_config["reference_asking_prompt_template"] | |
| topic_classes = level_config["topic_classes"] | |
| style_distribution = level_config["style_distribution"] | |
| #conversation_distribution = level_config["conversation_distribution"] | |
| question_type_distribution = level_config["question_type_distribution"] | |
| json_schema = level_config["json_schema"] | |
| prompt = system_prompt + "\n" | |
| question_type = None | |
| # --- 抽 topic(等機率) --- | |
| topic_class = random.choice(topic_classes) | |
| # --- 按機率抽 style --- | |
| #styles, style_probs = zip(*style_distribution.items()) | |
| #style = random.choices(styles, weights=style_probs, k=1)[0] | |
| #conversation_types, conversation_types = zip(*conversation_distribution.items()) | |
| #conversation = random.choices(conversation_types, weights=conversation_types, k=1)[0] | |
| #if conversation == "含對話": | |
| # style = style + ("(含對話)") | |
| #else: | |
| # style = style + ("(純敘述)") | |
| # --- 按機率抽 question_type --- | |
| if question_type_distribution is not None: | |
| qtypes, q_probs = zip(*question_type_distribution.items()) | |
| question_type = random.choices(qtypes, weights=q_probs, k=1)[0] | |
| if question_type: | |
| reference_asking_prompt = reference_asking_prompt_template.safe_substitute(style=style, question_type=question_type, content=pre_text) | |
| else: | |
| reference_asking_prompt = reference_asking_prompt_template.safe_substitute(style=style, content=pre_text) | |
| response_rule = "【json_schema 輸出格式】"+"\n"+ str(json_schema) | |
| return prompt+definition_prompt+reference_asking_prompt+"\n\n"+response_rule | |
| def gen_cloze_prompts( | |
| pre_text: str, | |
| tocfl_level: str, | |
| ) -> str: | |
| """ | |
| 選詞填空生成 prompt 產生器 | |
| """ | |
| level_map = {"入門基礎":"A-入門基礎", "進階高階":"B-進階高階", "流利精通":"C-流利精通"} | |
| level = level_map[tocfl_level] | |
| level_config = CLOZE_LEVEL_CONFIG[level] | |
| word_list = level_config["word_list"] | |
| grammar_list = level_config["grammar_list"] | |
| all_patterns = level_config["all_patterns"] | |
| examples = level_config["examples"] | |
| system_prompt = level_config["system_prompt"] | |
| prompt_definition_template = level_config["prompt_definition_template"] | |
| asking_prompt_1 = level_config["asking_prompt_1"] | |
| reference_asking_prompt_1_template = level_config["reference_asking_prompt_1_template"] | |
| perference_prompts = level_config["perference_prompts"] | |
| asking_prompt_2_template = level_config["asking_prompt_2_template"] | |
| json_schema = level_config["json_schema"] | |
| word_text = "、".join(random.sample(word_list, min(30, len(word_list)))) | |
| grammar_text = "、".join(random.sample(grammar_list, min(10, len(grammar_list)))) | |
| pattern_text = "、".join(random.sample(all_patterns, min(9, len(all_patterns)))) | |
| prompt_definition = prompt_definition_template.safe_substitute(level_word_list=word_text, level_grammar_list=grammar_text, level_pattern_list=pattern_text, level_examples=examples) | |
| pre_text_prompt = "【前置課文】" + "\n" + pre_text | |
| asking_prompt = "請思考讀者學習完【前置課文】後的克漏字填空出題方向,其中克漏字填空裡的[完整文章]是【前置課文】的延伸,兩者需保持一定的相關性,但不需重複。請先根據【要求】的規範生成不帶任何空格的[完整文章]。" | |
| chat_prompt1 = system_prompt + "\n" + prompt_definition + pre_text_prompt + '\n' + asking_prompt | |
| chat_prompt2 = asking_prompt_2_template.safe_substitute(level_preference1=perference_prompts[0], level_preference2=perference_prompts[1], patterns_text=pattern_text) | |
| return chat_prompt1, chat_prompt2 | |
| # In[ ]: | |
| # ========================= | |
| # 2. Gradio UI 定義 | |
| # ========================= | |
| TBCL_LEVELS = ["第一級", "第二級", "第三級", "第四級", "第五級", "第六級"] | |
| TOCFL_LEVELS = ["入門基礎", "進階高階", "流利精通"] | |
| ARTICLE_STYLES = ["記敘文", "議論文", "説明文", "抒情文"] | |
| ARTICLE_TYPES = ["短文", "對話"] | |
| MAIN_TOPICS = list(subtopic_map.keys()) # 從 subtopic_map 取得主題列表 | |
| NEED_FIX_STATUS = ["生成題目TOCFL等級過高", "生成題目TOCFL等級過低", "生成題目品質不過關"] | |
| with gr.Blocks(title="教育任務 Prompt 產生器") as demo: | |
| gr.Markdown("# 🧩 教育任務 Prompt 產生器") | |
| with gr.Tab("課文生成"): | |
| tbcl_level_in = gr.Dropdown( | |
| choices=TBCL_LEVELS, | |
| value="第一級", | |
| label="TBCL 等級", | |
| ) | |
| main_topic_in = gr.Dropdown( | |
| choices=MAIN_TOPICS, | |
| value=MAIN_TOPICS[0], | |
| label="主題", | |
| ) | |
| sub_topic_in = gr.Dropdown( | |
| choices=[], | |
| value=None, | |
| label="副主題(選填)", | |
| interactive=True, | |
| ) | |
| extra_content_in = gr.Textbox( | |
| lines=5, | |
| label="額外內容", | |
| placeholder="可填寫想加入的設定、情境說明等(選填)", | |
| ) | |
| article_type_in = gr.Dropdown( | |
| choices=ARTICLE_TYPES, | |
| value="短文", | |
| label="文章類型", | |
| ) | |
| gen_button_1 = gr.Button("產生課文生成 Prompt") | |
| output_1 = gr.Textbox( | |
| lines=10, | |
| label="課文生成 Prompt(string)", | |
| ) | |
| def _update_subtopics(main_topic): | |
| """當主題改變時,更新副主題選項""" | |
| if main_topic in subtopic_map: | |
| subtopics = subtopic_map[main_topic] | |
| return gr.Dropdown(choices=subtopics, value=None) | |
| return gr.Dropdown(choices=[], value=None) | |
| def _on_gen_textbook(tbcl_level, main_topic, sub_topic, extra_content, article_type): | |
| return gen_textbook_prompt( | |
| tbcl_level=tbcl_level, | |
| main_topic=main_topic, | |
| sub_topic=sub_topic, | |
| extra_content=extra_content, | |
| article_type=article_type, | |
| ) | |
| # 當主題改變時,更新副主題選項 | |
| main_topic_in.change( | |
| _update_subtopics, | |
| inputs=[main_topic_in], | |
| outputs=[sub_topic_in], | |
| ) | |
| gen_button_1.click( | |
| _on_gen_textbook, | |
| inputs=[tbcl_level_in, main_topic_in, sub_topic_in, extra_content_in, article_type_in], | |
| outputs=output_1, | |
| ) | |
| with gr.Tab("詞意消歧"): | |
| sentence_in = gr.Textbox( | |
| lines=5, | |
| label="句子", | |
| placeholder="請輸入包含目標詞彙的完整句子", | |
| ) | |
| target_word_in = gr.Textbox( | |
| lines=1, | |
| label="詞彙", | |
| placeholder="請輸入要進行詞意消歧的詞彙", | |
| ) | |
| gen_button_2 = gr.Button("產生詞意消歧 Prompt") | |
| output_2 = gr.Textbox( | |
| lines=8, | |
| label="詞意消歧 Prompt(string)", | |
| ) | |
| target_choice_in = gr.Textbox( | |
| lines=1, | |
| label="選項", | |
| placeholder="請輸入正確的選項", | |
| ) | |
| gen_button_2_2 = gr.Button("產生詞彙釋義相關資訊") | |
| output_3 = gr.Textbox( | |
| lines=10, | |
| label="詞彙釋義資訊", | |
| ) | |
| def _on_gen_word_sense(sentence, target_word): | |
| return gen_word_sense_prompt(sentence=sentence, target_word=target_word) | |
| gen_button_2.click( | |
| _on_gen_word_sense, | |
| inputs=[sentence_in, target_word_in], | |
| outputs=output_2, | |
| ) | |
| def _on_gen_word_info(sentence, target_word, target_choice): | |
| return gen_word_info(sentence=sentence, target_word=target_word, target_choice=target_choice) | |
| gen_button_2_2.click( | |
| _on_gen_word_info, | |
| inputs=[sentence_in, target_word_in, target_choice_in], | |
| outputs=output_3, | |
| ) | |
| with gr.Tab("課文語法分析"): | |
| textbook_in = gr.Textbox( | |
| lines=10, | |
| label="課文", | |
| placeholder="請貼上完整課文內容", | |
| ) | |
| gen_button_3 = gr.Button("產生課文語法分析 Prompt") | |
| output_3 = gr.Textbox( | |
| lines=8, | |
| label="課文語法分析 Prompt(string)", | |
| ) | |
| def _on_gen_grammar(textbook): | |
| return gen_grammar_analysis_prompt(textbook=textbook) | |
| gen_button_3.click( | |
| _on_gen_grammar, | |
| inputs=[textbook_in], | |
| outputs=output_3, | |
| ) | |
| with gr.Tab("閱讀測驗生成"): | |
| pre_text_in = gr.Textbox( | |
| lines=10, | |
| label="前置課文", | |
| placeholder="請貼上作為閱讀測驗基礎的課文", | |
| ) | |
| tocfl_level_in_1 = gr.Dropdown( | |
| choices=TOCFL_LEVELS, | |
| value="入門基礎", | |
| label="TOCFL 等級", | |
| ) | |
| article_style_in = gr.Dropdown( | |
| choices=ARTICLE_STYLES, | |
| value="記敘文", | |
| label="題目文章文體", | |
| ) | |
| gen_button_4 = gr.Button("產生閱讀測驗 Prompt") | |
| output_4 = gr.Textbox( | |
| lines=8, | |
| label="閱讀測驗生成 Prompt(string)", | |
| ) | |
| judgement_level_status_in = gr.Dropdown( | |
| choices=NEED_FIX_STATUS, | |
| value="題目等級過高", | |
| label="遇到需要修正的評估結果", | |
| ) | |
| gen_button_4_2 = gr.Button("產生修正等級的 Prompt") | |
| output_4_2 = gr.Textbox( | |
| lines=1, | |
| label="修正Prompt生成", | |
| ) | |
| def _on_gen_reading_test(pre_text, tocfl_level, style): | |
| return gen_reading_test_prompt( | |
| pre_text=pre_text, | |
| tocfl_level=tocfl_level, | |
| style=style, | |
| ) | |
| gen_button_4.click( | |
| _on_gen_reading_test, | |
| inputs=[pre_text_in, tocfl_level_in_1, article_style_in], | |
| outputs=output_4, | |
| ) | |
| gr.Markdown("## 品質評估用的prompt_1\n" + READING_JUDGE_PROMPTS.system + "\n\n[待評估之課文]") | |
| gr.Markdown("## 品質評估用的prompt_2\n" + READING_JUDGE_PROMPTS.additional_check) | |
| gr.Markdown("## 品質評估用的prompt_3 (格式化輸出)\n" +"【json_schema 輸出格式】\n\n"+ str(READING_JUDGE_PROMPTS.json_schema)) | |
| def _on_gen_reading_test(judgement_level_status): | |
| if judgement_level_status == "生成題目TOCFL等級過高": | |
| return FIX_HIGH_LEVEL_PROMPT | |
| elif judgement_level_status == "生成題目TOCFL等級過低": | |
| return FIX_LOW_LEVEL_PROMPT | |
| elif judgement_level_status == "生成題目品質不過關": | |
| return FIX_PREFERENCE_PROMPT | |
| gen_button_4_2.click( | |
| _on_gen_reading_test, | |
| inputs=[judgement_level_status_in], | |
| outputs=output_4_2, | |
| ) | |
| with gr.Tab("選詞填空生成"): | |
| pre_text_in_2 = gr.Textbox( | |
| lines=10, | |
| label="前置課文", | |
| placeholder="請貼上作為選詞填空基礎的課文", | |
| ) | |
| tocfl_level_in_2 = gr.Dropdown( | |
| choices=TOCFL_LEVELS[1:], | |
| value="進階高階", | |
| label="TOCFL 等級", | |
| ) | |
| gen_button_5 = gr.Button("產生完整文章生成 Prompt") | |
| output_5 = gr.Textbox( | |
| lines=8, | |
| label="選詞填空完整文章生成 Prompt(string)", | |
| ) | |
| gen_button_5_2 = gr.Button("產生選詞填空出題 Prompt") | |
| output_5_2 = gr.Textbox( | |
| lines=8, | |
| label="選詞填空題目生成 Prompt(string)", | |
| ) | |
| gr.Markdown("## 品質評估用的prompt_1\n" + CLOZE_JUDGE_PROMPTS.system + "\n\n[待評估之課文]") | |
| gr.Markdown("## 品質評估用的prompt_2\n" + CLOZE_JUDGE_PROMPTS.additional_check) | |
| gr.Markdown("## 品質評估用的prompt_3 (格式化輸出)\n" +"【json_schema 輸出格式】\n\n"+ str(CLOZE_JUDGE_PROMPTS.json_schema)) | |
| def _on_gen_cloze_complete_text(pre_text, tocfl_level): | |
| return gen_cloze_prompts( | |
| pre_text=pre_text, | |
| tocfl_level=tocfl_level, | |
| )[0] | |
| gen_button_5.click( | |
| _on_gen_cloze_complete_text, | |
| inputs=[pre_text_in_2, tocfl_level_in_2], | |
| outputs=output_5, | |
| ) | |
| def _on_gen_cloze_text(pre_text, tocfl_level): | |
| return gen_cloze_prompts( | |
| pre_text=pre_text, | |
| tocfl_level=tocfl_level, | |
| )[1] | |
| gen_button_5.click( | |
| _on_gen_cloze_text, | |
| inputs=[pre_text_in_2, tocfl_level_in_2], | |
| outputs=output_5_2, | |
| ) | |
| # In[10]: | |
| # 直接執行:python this_file.py 後 demo.launch() | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |
| #demo.launch(share=True) | |
| # In[11]: | |
| WORD_LIST_COLUMN_NAMES = ["繁體中文", "TBCL等級", "注音", "漢語拼音", "詞性", "英文翻譯", "例句", "例句漢語拼音", "例句翻譯", "位置", "釋義", "釋義ID"] | |
| # In[ ]: | |