XXiao commited on
Commit
b99eae5
·
1 Parent(s): df2999f

last update

Browse files
app.py CHANGED
@@ -64,7 +64,7 @@ API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
64
  API_KEY = os.getenv("API_KEY", "")
65
  USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
66
  USE_MOCK_TTS = os.getenv("USE_MOCK_TTS", "0") == "1"
67
- CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
68
  TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
69
  TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
70
  TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
@@ -94,58 +94,59 @@ QWEN_VL_MCQ_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MCQ_MAX_NEW_TOKENS", "1800")
94
 
95
 
96
  DEFAULT_LECTURE_PROMPT_TEMPLATE = """
97
- 你是一名课程助教。请阅读用户上传的论文内容,并输出一段中文讲解,要求:
98
- 1. 先说明论文要解决的问题和背景;
99
- 2. 再解释核心方法(按步骤/模块);
100
- 3. 再总结实验结果或亮点;
101
- 4. 最后给出局限性与适用场景;
102
- 5. 语言清晰,适合课堂讲解(约 400-700 字)。
103
-
104
- 论文内容(可能是节选):
105
  {document}
106
  """.strip()
107
 
108
 
109
  DEFAULT_MCQ_PROMPT_TEMPLATE = """
110
- 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
111
- 严格输出 JSON(不要 markdown 代码块),格式如下:
112
  {{
113
  "questions": [
114
  {{
115
  "question": "...",
116
- "options": ["A选项", "B选项", "C选项", "D选项"],
117
  "answer": "A",
118
  "explanation": "..."
119
  }}
120
  ]
121
  }}
122
 
123
- 要求:
124
- 1. 5 题;
125
- 2. 每题 4 个选项;
126
- 3. answer 必须是 A/B/C/D
127
- 4. 解析要说明为什么正确,以及常见误区;
128
- 5. 题目应覆盖问题背景、方法、实验/结果、局限性。
129
 
130
- 论文内容(可能是节选):
131
  {document}
132
  """.strip()
133
 
134
 
135
  DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE = """
136
- 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
 
137
 
138
- 限制:
139
- 1. 必须是紧凑 JSON(单行也可以);
140
- 2. 5 题;
141
- 3. 每题 questionoptions(4)answer(A/B/C/D)explanation
142
- 4. 解析简短(1-2句),避免过长;
143
- 5. 如果不确定,仍按论文内容出题,不要输出额外文字。
144
 
145
- 输出格式:
146
  {{"questions":[{{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}}]}}
147
 
148
- 论文内容:
149
  {document}
150
  """.strip()
151
 
@@ -189,6 +190,10 @@ def load_character_configs() -> Dict[str, Dict[str, Any]]:
189
  if cid in configs:
190
  cid = d.name
191
  avatar_rel = str(meta.get("avatar", "avatar.jpg"))
 
 
 
 
192
  config: Dict[str, Any] = {
193
  "id": cid,
194
  "display_name": str(meta.get("display_name", d.name)),
@@ -198,17 +203,25 @@ def load_character_configs() -> Dict[str, Dict[str, Any]]:
198
  "chat_mode": str(meta.get("chat_mode", "paper mode")),
199
  "avatar_path": str((d / avatar_rel).resolve()),
200
  "lecture_prompt_template": _read_text_if_exists(
201
- d / str(meta.get("lecture_prompt_file", "lecture_prompt.txt")),
202
  DEFAULT_LECTURE_PROMPT_TEMPLATE,
203
  ),
204
  "mcq_prompt_template": _read_text_if_exists(
205
- d / str(meta.get("mcq_prompt_file", "mcq_prompt.txt")),
206
  DEFAULT_MCQ_PROMPT_TEMPLATE,
207
  ),
208
  "mcq_retry_prompt_template": _read_text_if_exists(
209
- d / str(meta.get("mcq_retry_prompt_file", "mcq_retry_prompt.txt")),
210
  DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
211
  ),
 
 
 
 
 
 
 
 
212
  }
213
  configs[cid] = config
214
 
@@ -225,6 +238,7 @@ def load_character_configs() -> Dict[str, Dict[str, Any]]:
225
  "lecture_prompt_template": DEFAULT_LECTURE_PROMPT_TEMPLATE,
226
  "mcq_prompt_template": DEFAULT_MCQ_PROMPT_TEMPLATE,
227
  "mcq_retry_prompt_template": DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
 
228
  }
229
  return configs
230
 
@@ -234,6 +248,11 @@ DEFAULT_CHARACTER_ID = next(iter(CHARACTER_CONFIGS.keys()))
234
 
235
 
236
  def get_character_config(character_id: Optional[str]) -> Dict[str, Any]:
 
 
 
 
 
237
  if character_id and character_id in CHARACTER_CONFIGS:
238
  return CHARACTER_CONFIGS[character_id]
239
  return CHARACTER_CONFIGS[DEFAULT_CHARACTER_ID]
@@ -698,34 +717,34 @@ class QwenPipelineEngine:
698
  def _mock_generate_mcqs(self, lecture_text: str) -> List[MCQItem]:
699
  base_questions = [
700
  MCQItem(
701
- question="这篇论文最主要想解决的问题通常属于下列哪一类?",
702
- options=["现有方法存在性能或效率瓶颈", "如何设计数据库索引", "如何搭建前端页面", "如何压缩视频文件"],
703
  answer="A",
704
- explanation="课程论文阅读类任务通常围绕已有方法不足展开,作者提出新方法来提升性能、效率或鲁棒性。",
705
  ),
706
  MCQItem(
707
- question="在讲解论文方法时,最合理的组织方式是什么?",
708
- options=["按模块或步骤解释输入到输出流程", "只列出参考文献", "只展示实验表格不解释方法", "只讲结论不讲背景"],
709
  answer="A",
710
- explanation="课堂讲解需要结构化地说明方法流程,这样听众才能理解论文如何从问题走到解法。",
711
  ),
712
  MCQItem(
713
- question="生成选择题时,为什么需要同时给出答案和解析?",
714
- options=["便于交互反馈与纠错教学", "只是为了让 JSON 更长", "因为 Gradio 要求必须有解析", "为了减少题目数量"],
715
  answer="A",
716
- explanation="答案和解析是教学闭环的一部分,错误时给出解析能帮助用户理解常见误区。",
717
  ),
718
  MCQItem(
719
- question="如果论文很长,一次性输入模型的风险是什么?",
720
- options=["上下文超长导致成本高、信息丢失或失败", "模型会自动变得更准确", "TTS 音频会变短", "PDF 文件会损坏"],
721
  answer="A",
722
- explanation="长文档通常需要分块总结再汇总,避免超出上下文窗口并降低生成质量波动。",
723
  ),
724
  MCQItem(
725
- question="在这个 Demo 流程中,Qwen TTS 的作用是什么?",
726
- options=["把讲解与错题解析转成语音输出", " PDF 转成图片", "训练 Qwen3-VL-8B", "生成新的选择题答案"],
727
  answer="A",
728
- explanation="TTS 用于将文本讲解/解析语音化,提高交互演示效果和可访问性。",
729
  ),
730
  ]
731
  return base_questions
@@ -1007,7 +1026,7 @@ class QwenPipelineEngine:
1007
  )
1008
  try:
1009
  mcqs = parse_mcq_json(raw_mcq_json)
1010
- except json.JSONDecodeError:
1011
  retry_prompt = render_prompt_template(str(mcq_retry_template), pdf_excerpt)
1012
  retry_raw = self._real_generate_text_from_pdf(
1013
  pdf_path,
@@ -1029,6 +1048,27 @@ class QwenPipelineEngine:
1029
 
1030
 
1031
  def parse_mcq_json(raw: str) -> List[MCQItem]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
  cleaned = strip_code_fence(raw)
1033
  try:
1034
  payload = json.loads(cleaned)
@@ -1039,13 +1079,39 @@ def parse_mcq_json(raw: str) -> List[MCQItem]:
1039
  payload = json.loads(cleaned[start:end + 1])
1040
  else:
1041
  raise
1042
- questions = payload.get("questions", [])
 
 
 
1043
  parsed: List[MCQItem] = []
1044
  for item in questions[:5]:
 
 
1045
  q = str(item.get("question", "")).strip()
1046
- options = [normalize_option_text(x) for x in item.get("options", [])][:4]
1047
- answer = str(item.get("answer", "")).strip().upper()
1048
- explanation = str(item.get("explanation", "")).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
  if len(options) != 4:
1050
  continue
1051
  if answer not in {"A", "B", "C", "D"}:
@@ -1191,35 +1257,60 @@ def _pick_variant(items: List[str], seed: int) -> str:
1191
  return items[seed % len(items)]
1192
 
1193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1194
  def _examiner_style_prompt(character_id: str) -> str:
1195
- cid = (character_id or "").lower()
1196
- guardrails = (
1197
- "Do NOT invent magical facts. "
1198
- "You may use light Hogwarts classroom metaphors (e.g., 'like your Potions class' / marks / exam / detention), "
1199
- "but do NOT mention spells, incantations, wands, named artifacts, named potions, or made-up magical theory. "
1200
- "Do NOT mention Harry Potter or any specific character names. "
1201
- "Keep remarks grounded in the student's performance on this paper/exam."
1202
- )
1203
- if "snape" in cid:
1204
- return (
1205
- "You are Professor Severus Snape. Cold, cutting, impatient with sloppy thinking; no emojis; no stage directions. "
1206
- "Sound like Snape: sharp disdain, controlled cruelty, short jabs, second-person address ('you'). "
1207
- "Avoid generic academic HR language like 'fundamental lack of precision' or 'theoretical principles'. "
1208
- f"{guardrails} Be brief and exam-focused."
1209
- )
1210
- if "mcgonagall" in cid or "mcg" in cid:
1211
  return (
1212
- "You are Professor Minerva McGonagall. Crisp, strict, decisive; no emojis; no stage directions. "
1213
- "Sound like McGonagall: brisk, no-nonsense, disciplined; firm standards with a controlled, teacherly tone; second-person address ('you'). "
1214
- "Be less harsh than Snape: no contempt, no insults; correct firmly and encourage disciplined improvement. "
1215
- "Avoid academic report phrasing (e.g., 'demonstrates', 'fundamental', 'theoretical', 'principles', 'application'). "
1216
- "Prefer plain classroom language: 'That will not do', 'Pay attention', 'Be precise', 'Again', 'Good—carry on'. "
1217
- "Be pointed, practical, and supportive when appropriate. "
1218
- f"{guardrails} Be brief, firm, and exam-focused."
1219
  )
1220
  return (
1221
- "You are a strict examiner. Be brief, precise, and exam-focused. "
1222
- f"{guardrails} No emojis."
 
1223
  )
1224
 
1225
 
@@ -1232,56 +1323,12 @@ def _llm_short_exam_remark(character_id: str, *, kind: str, context: str = "") -
1232
  if engine.mock_mode:
1233
  return ""
1234
  ctx = " ".join(str(context or "").strip().split())
1235
- if ctx:
1236
- ctx = f"Context: {ctx}\n"
1237
- style_seed = uuid.uuid4().hex
1238
- cid = (character_id or "").lower()
1239
  if kind == "correct":
1240
- if "snape" in cid:
1241
- openers = ["Correct.", "Precisely.", "Good.", "Exactly.", "That's right.", "Adequate.", "Very well."]
1242
- elif "mcgonagall" in cid or "mcg" in cid:
1243
- openers = ["Good.", "Correct.", "Quite right.", "That's right.", "That's better.", "Well done.", "Exactly."]
1244
- else:
1245
- openers = ["That's right.", "That's correct.", "Correct.", "Exactly.", "Good.", "Well done."]
1246
- instruction = (
1247
- f"{ctx}"
1248
- f"Style seed (do not repeat it): {style_seed}\n"
1249
- "Write ONE short, in-character sentence reacting to a correct answer. "
1250
- "Start the sentence with ONE of these openers exactly: "
1251
- + ", ".join([f"'{o}'" for o in openers])
1252
- + ". "
1253
- "Choose the opener to maximize variety across runs. "
1254
- "Max 16 words. No markdown. No emojis. "
1255
- "Make it sound like the character, not a generic professor."
1256
- )
1257
  elif kind == "incorrect":
1258
- if "snape" in cid:
1259
- openers = ["Wrong!", "Wrong.", "No.", "Obviously not.", "Incorrect.", "Not even close."]
1260
- elif "mcgonagall" in cid or "mcg" in cid:
1261
- openers = ["No.", "Not quite.", "That will not do.", "Incorrect.", "Careful.", "Stop guessing."]
1262
- else:
1263
- openers = ["Wrong!", "Wrong.", "Not quite.", "No.", "Incorrect.", "That's wrong."]
1264
- instruction = (
1265
- f"{ctx}"
1266
- f"Style seed (do not repeat it): {style_seed}\n"
1267
- "Write ONE short, in-character sentence reacting to an incorrect answer. "
1268
- "Start the sentence with ONE of these openers exactly: "
1269
- + ", ".join([f"'{o}'" for o in openers])
1270
- + ". "
1271
- "Choose the opener to maximize variety across runs. "
1272
- "Do NOT mention the correct option letter. "
1273
- "Do NOT include the phrase 'The correct answer is'. "
1274
- "Max 20 words. No markdown. No emojis. "
1275
- "Make it sound like the character, not a generic professor."
1276
- )
1277
  else:
1278
- instruction = (
1279
- f"{ctx}"
1280
- f"Style seed (do not repeat it): {style_seed}\n"
1281
- "Write 1–2 short, in-character sentences as a final examiner remark, with ONE concrete revision instruction. "
1282
- "Max 28 words total. No markdown. No emojis. "
1283
- "Do not sound like a generic academic report."
1284
- )
1285
  text = _llm_exam_feedback(
1286
  [
1287
  {"role": "system", "content": _examiner_style_prompt(character_id)},
@@ -1298,20 +1345,20 @@ def exam_feedback_correct(character_id: str, *, q_index: int) -> str:
1298
  if "snape" in cid:
1299
  return _pick_variant(
1300
  [
1301
- "That's right. Try not to look so astonished; it is unbecoming.",
1302
- "Correct. At least you read something other than the title.",
1303
- "Precisely. Keep up—this is not a guessing game.",
1304
- "Good. Acceptable. Proceed before you ruin it.",
1305
  ],
1306
  q_index,
1307
  )
1308
  if "mcgonagall" in cid or "mcg" in cid:
1309
  return _pick_variant(
1310
  [
1311
- "That's correct. Good—do not get complacent.",
1312
- "Good. Sensible. Keep your notes straight and move on.",
1313
- "Well done. Stay focused; the next will not be kinder.",
1314
- "Precisely. Continue, and keep the standard.",
1315
  ],
1316
  q_index,
1317
  )
@@ -1342,10 +1389,10 @@ def exam_feedback_incorrect(
1342
  if "snape" in cid:
1343
  opener = _pick_variant(
1344
  [
1345
- "Wrong! Listen carefully—if you are capable of it.",
1346
- "Incorrect. Pay attention; your confidence is not evidence.",
1347
- "Wrong. This is what careless reading looks like in public.",
1348
- "Incorrect. I expected better discipline. Clearly, that was optimistic.",
1349
  ],
1350
  q_index,
1351
  )
@@ -1353,10 +1400,10 @@ def exam_feedback_incorrect(
1353
  if "mcgonagall" in cid or "mcg" in cid:
1354
  opener = _pick_variant(
1355
  [
1356
- "Incorrect. Think it through properly—do not guess.",
1357
- "Not quite. Slow down and read precisely; words matter.",
1358
- "Incorrect. You are guessing—stop it at once.",
1359
- "Not correct. Focus on the method, not the surface wording.",
1360
  ],
1361
  q_index,
1362
  )
@@ -1381,22 +1428,22 @@ def exam_feedback_final(character_id: str, *, score: int, total: int) -> str:
1381
  band = _score_band(score, total)
1382
  if "snape" in cid:
1383
  mapping = {
1384
- "excellent": "Excellent. For once, you have not wasted my time.",
1385
- "good": "Adequate. Do not mistake adequacy for insight.",
1386
- "fair": "Mediocre. You have work to do—start now, not later.",
1387
- "poor": "Disappointing. Guesswork is not scholarship; it is laziness.",
1388
- "none": "No score to judge—how convenient for you.",
1389
  }
1390
- return mapping.get(band, "Enough.")
1391
  if "mcgonagall" in cid or "mcg" in cid:
1392
  mapping = {
1393
- "excellent": "Excellent work. That is the standard I expect—keep it there.",
1394
- "good": "Good. Solid understanding—polish the details and stop rushing.",
1395
- "fair": "Passable, but uneven. Review the method carefully and be exact.",
1396
- "poor": "Not acceptable. Go back and study properly, then try again.",
1397
- "none": "No score to judge—start when you are ready to work seriously.",
1398
  }
1399
- return mapping.get(band, "Well.")
1400
  return f"Final score: {score} / {total}."
1401
  try:
1402
  remark = _llm_short_exam_remark(
@@ -1414,10 +1461,10 @@ def exam_feedback_final(character_id: str, *, score: int, total: int) -> str:
1414
  def _roleplay_explain_feedback(character_id: str) -> str:
1415
  cid = (character_id or "").lower()
1416
  if "snape" in cid:
1417
- return "Lecture is ready. If you insist, press ‘Play Lecture Audio’; then go to the exam and try not to disgrace yourself."
1418
  if "mcgonagall" in cid or "mcg" in cid:
1419
- return "Lecture is ready. Review it properly, then go to the exam when you are prepared to be examined."
1420
- return "Lecture is ready. Review it, then go to the exam when you are ready."
1421
 
1422
 
1423
  def _roleplay_loading_text(character_id: str, *, phase: str) -> str:
@@ -1428,13 +1475,13 @@ def _roleplay_loading_text(character_id: str, *, phase: str) -> str:
1428
  if "snape" in cid:
1429
  return f"Professor {name} is scrutinizing your paper…"
1430
  if "mcgonagall" in cid or "mcg" in cid:
1431
- return f"Professor {name} is reviewing your paper with unforgiving precision…"
1432
  return f"Professor {name} is reviewing your paper…"
1433
  if "snape" in cid:
1434
- return f"Professor {name} is preparing something unpleasantly rigorous…"
1435
  if "mcgonagall" in cid or "mcg" in cid:
1436
- return f"Professor {name} is preparing a properly challenging set of questions…"
1437
- return f"Professor {name} is preparing your materials…"
1438
 
1439
 
1440
  def build_loading_html(text: str) -> str:
@@ -1497,7 +1544,13 @@ def reset_ui_from_state(
1497
  submit_interactive = quiz_ready and not state.get("completed", False)
1498
  radio_interactive = submit_interactive
1499
  lecture_tts_ready = bool(state.get("lecture_text"))
1500
- selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
 
 
 
 
 
 
1501
  if state.get("completed"):
1502
  radio_interactive = False
1503
  return (
@@ -1512,7 +1565,7 @@ def reset_ui_from_state(
1512
  state.get("status", "Idle"),
1513
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1514
  gr.update(
1515
- choices=paragraph_picker_choices(state.get("lecture_text", "")),
1516
  value=selected_paragraph_value,
1517
  interactive=lecture_tts_ready,
1518
  visible=lecture_tts_ready,
@@ -1520,6 +1573,7 @@ def reset_ui_from_state(
1520
  state.get("lecture_audio_path", None),
1521
  gr.update(interactive=lecture_tts_ready),
1522
  gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
 
1523
  gr.update(visible=exam_picker_visible),
1524
  gr.update(value=build_exam_chat_html(state), visible=show_exam_page and (quiz_ready or bool(state.get("exam_chat")))),
1525
  gr.update(choices=current_choices(state), value=None, interactive=radio_interactive),
@@ -1538,10 +1592,10 @@ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any
1538
  yield reset_ui_from_state(state, feedback="Upload a PDF to start.", results_visible=False, loading_visible=False)
1539
  return
1540
 
1541
- state["status"] = "正在生成中..."
1542
  yield reset_ui_from_state(
1543
  state,
1544
- feedback="正在读取论文并生成讲解与题目,请稍候...",
1545
  results_visible=False,
1546
  loading_visible=True,
1547
  loading_text=_roleplay_loading_text(state.get("character_id") or DEFAULT_CHARACTER_ID, phase="lecture"),
@@ -1572,7 +1626,7 @@ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any
1572
  loading_visible=False,
1573
  )
1574
  except Exception as exc:
1575
- state["status"] = "Failed during generation."
1576
  state["lecture_text"] = f"Error: {type(exc).__name__}: {exc}"
1577
  state["current_page"] = "explain"
1578
  yield reset_ui_from_state(
@@ -1586,7 +1640,7 @@ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any
1586
  def submit_answer(choice: Optional[str], state: Dict[str, Any]):
1587
  if not state.get("mcqs"):
1588
  state["status"] = "No quiz loaded."
1589
- return reset_ui_from_state(state, feedback="Load a PDF first.")
1590
  if state.get("completed"):
1591
  return reset_ui_from_state(state, feedback="Quiz already completed.")
1592
  if not choice:
@@ -1664,7 +1718,7 @@ def submit_answer(choice: Optional[str], state: Dict[str, Any]):
1664
 
1665
  def restart_quiz(state: Dict[str, Any]):
1666
  if not state.get("mcqs"):
1667
- return reset_ui_from_state(new_session_state(), feedback="Load a PDF first.")
1668
  state["current_index"] = 0
1669
  state["score"] = 0
1670
  state["awaiting_next_after_wrong"] = False
@@ -1713,7 +1767,7 @@ def generate_exam_mcq(selected_character_id: Optional[str], state: Dict[str, Any
1713
  yield reset_ui_from_state(state, feedback="Generate lecture first.", results_visible=False, loading_visible=False)
1714
  return
1715
  if not selected_character_id:
1716
- state["status"] = "Please select a character to generate MCQs."
1717
  yield reset_ui_from_state(state, feedback="", results_visible=True, loading_visible=False)
1718
  return
1719
 
@@ -1749,7 +1803,7 @@ def generate_exam_mcq(selected_character_id: Optional[str], state: Dict[str, Any
1749
  state["current_page"] = "exam"
1750
  state["mcq_generating"] = False
1751
  _ensure_current_question_in_exam_chat(state)
1752
- state["status"] = "Your exam is prepared."
1753
  yield reset_ui_from_state(
1754
  state,
1755
  feedback="",
@@ -1759,10 +1813,10 @@ def generate_exam_mcq(selected_character_id: Optional[str], state: Dict[str, Any
1759
  except Exception as exc:
1760
  state["current_page"] = "exam"
1761
  state["mcq_generating"] = False
1762
- state["status"] = "Failed during MCQ generation."
1763
  _append_exam_assistant_text(
1764
  state,
1765
- f"Failed to generate the exam.\nError: {type(exc).__name__}: {exc}",
1766
  kind="note",
1767
  )
1768
  yield reset_ui_from_state(
@@ -1839,18 +1893,18 @@ def play_lecture_audio(state: Dict[str, Any]):
1839
  backend = _tts_backend_name()
1840
  voice = tts_voice_for_character(state.get("character_id"))
1841
  try:
1842
- state["status"] = f"Generating lecture audio ({backend})..."
1843
  state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
1844
- state["status"] = "Lecture audio ready."
1845
  return (
1846
  state,
1847
  state["status"],
1848
  state["lecture_audio_path"],
1849
- f"Lecture audio generated via `{backend}`.",
1850
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1851
  )
1852
  except Exception as exc:
1853
- state["status"] = "Lecture audio generation failed."
1854
  return (
1855
  state,
1856
  state["status"],
@@ -1865,7 +1919,14 @@ def split_lecture_paragraphs(text: str) -> List[str]:
1865
  if not s:
1866
  return []
1867
  pieces = re.split(r"\n\s*\n+", s)
1868
- return [p.strip() for p in pieces if p and p.strip()]
 
 
 
 
 
 
 
1869
 
1870
 
1871
  def paragraph_picker_choices(lecture_text: str) -> List[tuple[str, str]]:
@@ -1879,6 +1940,29 @@ def paragraph_picker_choices(lecture_text: str) -> List[tuple[str, str]]:
1879
  return choices
1880
 
1881
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1882
  def build_clickable_lecture_html(lecture_text: str, selected_idx: str = "") -> str:
1883
  paragraphs = split_lecture_paragraphs(lecture_text)
1884
  if not paragraphs:
@@ -1897,9 +1981,9 @@ def build_clickable_lecture_html(lecture_text: str, selected_idx: str = "") -> s
1897
  else ""
1898
  )
1899
  parts.append(
1900
- f'<div class="lecture-paragraph{selected_cls}" data-idx="{i}" '
1901
- f'style="{selected_style}" '
1902
- f'onclick="window.__lectureSelectParagraph && window.__lectureSelectParagraph({i}, this, true);">{safe}</div>'
1903
  )
1904
  parts.append("</div>")
1905
  return "".join(parts)
@@ -1909,12 +1993,12 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
1909
  lecture_text = state.get("lecture_text", "")
1910
  paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
1911
  if not paragraphs:
1912
- state["status"] = "暂无讲解内容。"
1913
  return (
1914
  state,
1915
  state.get("status", "Idle"),
1916
  state.get("lecture_audio_path"),
1917
- "请先生成讲解。",
1918
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1919
  )
1920
 
@@ -1923,12 +2007,12 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
1923
  except Exception:
1924
  idx = -1
1925
  if idx < 0 or idx >= len(paragraphs):
1926
- state["status"] = "段落选择无效。"
1927
  return (
1928
  state,
1929
  state.get("status", "Idle"),
1930
  state.get("lecture_audio_path"),
1931
- "请重新点击要播放的段落。",
1932
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1933
  )
1934
 
@@ -1936,24 +2020,24 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
1936
  voice = tts_voice_for_character(state.get("character_id"))
1937
  try:
1938
  state["selected_paragraph_idx"] = str(idx)
1939
- state["status"] = f"正在生成段落语音({backend}..."
1940
  audio_path = engine.synthesize_tts(
1941
  paragraphs[idx],
1942
  name_prefix=f"lecture_p{idx+1}",
1943
  voice=voice,
1944
  )
1945
  state["lecture_audio_path"] = audio_path
1946
- state["status"] = "段落语音已生成。"
1947
  char_len = len(paragraphs[idx])
1948
  return (
1949
  state,
1950
  state["status"],
1951
  audio_path,
1952
- f"已生成第 {idx+1}/{len(paragraphs)} 段语音({char_len} 字符),可在下方播放。",
1953
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1954
  )
1955
  except Exception as exc:
1956
- state["status"] = "段落语音生成失败。"
1957
  return (
1958
  state,
1959
  state["status"],
@@ -1966,8 +2050,8 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
1966
  def play_explanation_audio(state: Dict[str, Any]):
1967
  text = state.get("last_explanation_tts_text", "")
1968
  if not text:
1969
- state["status"] = "No explanation available for TTS."
1970
- return state, state["status"], state.get("explanation_audio_path"), "Answer a question incorrectly first."
1971
  voice = tts_voice_for_character(state.get("exam_character_id") or state.get("character_id"))
1972
  try:
1973
  state["status"] = "Generating explanation audio..."
@@ -1981,27 +2065,54 @@ def play_explanation_audio(state: Dict[str, Any]):
1981
 
1982
  def on_play_lecture_audio_click(state: Dict[str, Any]):
1983
  state, status, audio_path, feedback, lecture_html = play_lecture_audio(state)
1984
- selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
 
 
 
 
 
 
 
 
1985
  return (
1986
  state,
1987
  status,
1988
  audio_path,
1989
  feedback,
1990
  lecture_html,
1991
- gr.update(value=selected_paragraph_value),
 
 
 
 
 
1992
  )
1993
 
1994
 
1995
  def on_play_paragraph_click(paragraph_idx: str, state: Dict[str, Any]):
1996
- state, status, audio_path, feedback, lecture_html = play_lecture_paragraph_audio(paragraph_idx, state)
1997
- selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
 
 
 
 
 
 
 
 
 
1998
  return (
1999
  state,
2000
  status,
2001
  audio_path,
2002
  feedback,
2003
  lecture_html,
2004
- gr.update(value=selected_paragraph_value),
 
 
 
 
 
2005
  )
2006
 
2007
 
@@ -2316,9 +2427,11 @@ body {{
2316
  #lecture-clickable .lecture-clickable,
2317
  #lecture-clickable .lecture-clickable * {{
2318
  pointer-events: auto !important;
 
 
2319
  }}
2320
  #lecture-clickable .lecture-paragraph {{
2321
- cursor: pointer;
2322
  pointer-events: auto !important;
2323
  padding: 10px 12px;
2324
  border-radius: 14px;
@@ -2329,6 +2442,10 @@ body {{
2329
  line-height: 1.45 !important;
2330
  color: rgba(244,246,251,0.95) !important;
2331
  }}
 
 
 
 
2332
  #lecture-clickable .lecture-paragraph:hover {{
2333
  background: rgba(255,255,255,0.08);
2334
  border-color: rgba(255,255,255,0.14);
@@ -2345,6 +2462,15 @@ body {{
2345
  box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
2346
  color: #ffffff !important;
2347
  }}
 
 
 
 
 
 
 
 
 
2348
  .lecture-empty {{
2349
  padding: 10px 12px;
2350
  color: rgba(244,246,251,0.72);
@@ -2378,6 +2504,63 @@ body {{
2378
  #play-paragraph-btn {{
2379
  display: none !important;
2380
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2381
  @keyframes tts_loading {{
2382
  100% {{ background-size: 110%; }}
2383
  }}
@@ -2563,6 +2746,9 @@ body {{
2563
  border-color: rgba(15, 23, 42, 0.10);
2564
  color: #0f172a !important;
2565
  }}
 
 
 
2566
  #lecture-clickable .lecture-paragraph:hover {{
2567
  background: rgba(15, 23, 42, 0.06);
2568
  border-color: rgba(15, 23, 42, 0.16);
@@ -2952,14 +3138,16 @@ with gr.Blocks(css=CSS) as demo:
2952
  const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
2953
  if (state.bound) return;
2954
  try {
2955
- const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
2956
- const rootCandidates = [
2957
- document,
2958
- grRoot && grRoot.shadowRoot ? grRoot.shadowRoot : null,
2959
- grRoot,
2960
- ].filter(Boolean);
 
 
2961
  const q = (sel) => {
2962
- for (const r of rootCandidates) {
2963
  const el = r.querySelector ? r.querySelector(sel) : null;
2964
  if (el) return el;
2965
  }
@@ -2987,8 +3175,8 @@ with gr.Blocks(css=CSS) as demo:
2987
  if (!audio) return;
2988
  if (audio.__ttsBound) return;
2989
  audio.__ttsBound = true;
2990
- audio.addEventListener("loadstart", () => showLoading("正在加载音频..."), true);
2991
- audio.addEventListener("waiting", () => showLoading("正在加载音频..."), true);
2992
  audio.addEventListener("canplay", () => hideLoading(), true);
2993
  audio.addEventListener("playing", () => hideLoading(), true);
2994
  audio.addEventListener("error", () => hideLoading(), true);
@@ -3006,7 +3194,12 @@ with gr.Blocks(css=CSS) as demo:
3006
  boxShadow: "0 0 0 1px rgba(255,255,255,0.16) inset",
3007
  color: "#ffffff",
3008
  };
3009
- for (const r of rootCandidates) {
 
 
 
 
 
3010
  const nodes = r.querySelectorAll ? r.querySelectorAll("#lecture-clickable .lecture-paragraph.is-selected") : [];
3011
  for (const node of nodes) {
3012
  node.classList.remove("is-selected");
@@ -3022,6 +3215,11 @@ with gr.Blocks(css=CSS) as demo:
3022
  if (para && para.classList) {
3023
  para.classList.add("is-selected");
3024
  para.setAttribute("data-selected", "1");
 
 
 
 
 
3025
  if (para.style) {
3026
  para.style.setProperty("background", selectedInlineStyle.background, "important");
3027
  para.style.setProperty("border-color", selectedInlineStyle.borderColor, "important");
@@ -3036,7 +3234,7 @@ with gr.Blocks(css=CSS) as demo:
3036
  input = inputWrap && inputWrap.querySelector ? inputWrap.querySelector("textarea, input") : null;
3037
  }
3038
  if (!input) {
3039
- showLoading("未找到段落选择控件,请刷新页面重试。");
3040
  return;
3041
  }
3042
  input.value = indexText;
@@ -3050,10 +3248,10 @@ with gr.Blocks(css=CSS) as demo:
3050
  if (innerBtn) btn = innerBtn;
3051
  }
3052
  if (!btn) {
3053
- showLoading("未找到段落播放控件,请刷新页面重试。");
3054
  return;
3055
  }
3056
- showLoading("正在生成语音...");
3057
  btn.click();
3058
  };
3059
  window.__lectureSelectParagraph = (idx, el, autoPlay = true) => {
@@ -3063,12 +3261,24 @@ with gr.Blocks(css=CSS) as demo:
3063
  const paragraphFromEvent = (e) => {
3064
  const target = e ? e.target : null;
3065
  if (target && target.nodeType === 1 && target.closest) {
 
 
 
 
 
 
 
 
3066
  const p = target.closest(".lecture-paragraph");
3067
  if (p) return p;
3068
  }
3069
  const path = (e && typeof e.composedPath === "function") ? e.composedPath() : [];
3070
  for (const n of path) {
3071
  if (n && n.classList && n.classList.contains("lecture-paragraph")) return n;
 
 
 
 
3072
  }
3073
  return null;
3074
  };
@@ -3076,26 +3286,51 @@ with gr.Blocks(css=CSS) as demo:
3076
  const onParagraphClick = (e) => {
3077
  const para = paragraphFromEvent(e);
3078
  if (!para) return;
 
 
 
 
3079
  const idx = para.getAttribute("data-idx");
3080
  if (typeof idx !== "string" || idx.trim() === "") return;
3081
  selectParagraph(idx, para, true);
3082
  };
 
 
 
 
 
 
 
 
 
 
 
3083
  const bindClickRoot = (root) => {
3084
  if (!root || !root.addEventListener) return;
3085
  if (root.__lectureClickBound) return;
3086
  root.__lectureClickBound = true;
3087
  root.addEventListener("click", onParagraphClick, true);
3088
  };
 
 
 
 
 
 
 
 
 
 
 
3089
 
3090
- for (const r of rootCandidates) bindClickRoot(r);
3091
  bindClickRoot(window);
 
3092
 
3093
  if (!state.rebindObserver) {
3094
  state.rebindObserver = new MutationObserver(() => {
3095
- const nextRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
3096
- for (const r of [document, nextRoot && nextRoot.shadowRoot ? nextRoot.shadowRoot : null, nextRoot]) {
3097
- bindClickRoot(r);
3098
- }
3099
  });
3100
  state.rebindObserver.observe(document.body, { childList: true, subtree: true });
3101
  }
@@ -3119,28 +3354,30 @@ with gr.Blocks(css=CSS) as demo:
3119
  build_clickable_lecture_html(""),
3120
  elem_id="lecture-clickable",
3121
  )
3122
- with gr.Row(elem_id="lecture-actions"):
3123
- play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
3124
- gr.Markdown("提示:可直接点击段落播放;若浏览器拦截点击,请使用下方 Chunk selector。", elem_id="paragraph-tts-tip")
 
 
 
 
 
 
 
3125
  paragraph_picker = gr.Radio(
3126
  choices=[],
3127
  value=None,
3128
  interactive=False,
3129
  visible=False,
3130
- label="Chunks (fallback selector)",
3131
  elem_id="paragraph-picker",
 
3132
  )
3133
- lecture_feedback = gr.Markdown("")
3134
- with gr.Row(elem_id="exam-entry-wrap"):
3135
- exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
3136
-
3137
- with gr.Column(elem_id="tts-wrap"):
3138
- lecture_audio = gr.Audio(label="Lecture TTS", type="filepath", elem_id="lecture-audio")
3139
- gr.HTML(
3140
- '<div id="tts-loading" aria-hidden="true" style="display:none"><div class="tts-loading-row"><div class="tts-loading-bar"></div><div class="tts-loading-text" id="tts-loading-text">正在加载音频...</div></div></div>',
3141
- )
3142
  paragraph_idx = gr.Textbox(value="", label="", show_label=False, elem_id="selected-paragraph")
3143
- play_paragraph_btn = gr.Button("Play paragraph", elem_id="play-paragraph-btn")
 
 
3144
 
3145
  with gr.Column(visible=False, elem_id="exam-picker-overlay") as exam_picker_overlay:
3146
  with gr.Column(elem_id="exam-picker-modal"):
@@ -3175,25 +3412,7 @@ with gr.Blocks(css=CSS) as demo:
3175
  requestAnimationFrame(doScroll);
3176
  setTimeout(doScroll, 50);
3177
  };
3178
- const disableGlobalBlockers = () => {
3179
- const nodes = document.querySelectorAll('.wrap.default, .wrap.center');
3180
- nodes.forEach((n) => {
3181
- const rect = n.getBoundingClientRect();
3182
- const nearFullScreen =
3183
- rect.width >= window.innerWidth - 4 &&
3184
- rect.height >= window.innerHeight - 4 &&
3185
- rect.left <= 2 &&
3186
- rect.top <= 2;
3187
- if (!nearFullScreen) return;
3188
- const cs = window.getComputedStyle(n);
3189
- if (cs.position !== 'fixed') return;
3190
- n.style.setProperty('display', 'none', 'important');
3191
- n.style.setProperty('pointer-events', 'none', 'important');
3192
- n.style.setProperty('background', 'transparent', 'important');
3193
- });
3194
- };
3195
  const ensure = () => {
3196
- disableGlobalBlockers();
3197
  const root = document.querySelector('#exam-chat');
3198
  const wrap = root ? root.querySelector('.exam-chat-wrap') : null;
3199
  if (!root || !wrap) return;
@@ -3248,6 +3467,7 @@ with gr.Blocks(css=CSS) as demo:
3248
  paragraph_picker,
3249
  lecture_audio,
3250
  play_lecture_btn,
 
3251
  exam_btn,
3252
  exam_picker_overlay,
3253
  exam_chat,
@@ -3283,7 +3503,7 @@ with gr.Blocks(css=CSS) as demo:
3283
  outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
3284
  show_progress="minimal",
3285
  )
3286
- paragraph_picker.change(
3287
  fn=on_play_paragraph_click,
3288
  inputs=[paragraph_picker, state],
3289
  outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
 
64
  API_KEY = os.getenv("API_KEY", "")
65
  USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
66
  USE_MOCK_TTS = os.getenv("USE_MOCK_TTS", "0") == "1"
67
+ CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "gpt-4.1")
68
  TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
69
  TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
70
  TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
 
94
 
95
 
96
  DEFAULT_LECTURE_PROMPT_TEMPLATE = """
97
+ You are a teaching assistant. Read the uploaded paper content and produce a clear lecture-style explanation in English:
98
+ 1. Explain the problem and background first;
99
+ 2. Explain the core method step by step / module by module;
100
+ 3. Summarize key experiments and highlights;
101
+ 4. End with limitations and suitable use cases;
102
+ 5. Keep it classroom-friendly (about 400-700 words).
103
+
104
+ Paper content (may be excerpted):
105
  {document}
106
  """.strip()
107
 
108
 
109
  DEFAULT_MCQ_PROMPT_TEMPLATE = """
110
+ Based on the paper content below, generate 5 English single-choice MCQs for a classroom quiz.
111
+ Output strict JSON only (no markdown code block), in this format:
112
  {{
113
  "questions": [
114
  {{
115
  "question": "...",
116
+ "options": ["Option A", "Option B", "Option C", "Option D"],
117
  "answer": "A",
118
  "explanation": "..."
119
  }}
120
  ]
121
  }}
122
 
123
+ Requirements:
124
+ 1. Exactly 5 questions;
125
+ 2. 4 options per question;
126
+ 3. `answer` must be one of A/B/C/D;
127
+ 4. Explanation should tell why it is correct and common mistakes;
128
+ 5. Cover background, method, experiments/results, and limitations.
129
 
130
+ Paper content (may be excerpted):
131
  {document}
132
  """.strip()
133
 
134
 
135
  DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE = """
136
+ Generate 5 English single-choice MCQs from the following paper content.
137
+ Output valid JSON only. No explanation outside JSON, no markdown.
138
 
139
+ Constraints:
140
+ 1. Compact JSON (single line is fine);
141
+ 2. Exactly 5 questions;
142
+ 3. Each question includes `question`, `options` (4 items), `answer` (A/B/C/D), `explanation`;
143
+ 4. Keep explanations short (1-2 sentences);
144
+ 5. If uncertain, still generate based on the paper content only.
145
 
146
+ Output format:
147
  {{"questions":[{{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}}]}}
148
 
149
+ Paper content:
150
  {document}
151
  """.strip()
152
 
 
190
  if cid in configs:
191
  cid = d.name
192
  avatar_rel = str(meta.get("avatar", "avatar.jpg"))
193
+ lecture_prompt_path = d / "lecture_prompt.txt"
194
+ mcq_prompt_path = d / "mcq_prompt.txt"
195
+ mcq_retry_prompt_path = d / "mcq_retry_prompt.txt"
196
+ feedback_prompt_path = d / "feedback.txt"
197
  config: Dict[str, Any] = {
198
  "id": cid,
199
  "display_name": str(meta.get("display_name", d.name)),
 
203
  "chat_mode": str(meta.get("chat_mode", "paper mode")),
204
  "avatar_path": str((d / avatar_rel).resolve()),
205
  "lecture_prompt_template": _read_text_if_exists(
206
+ lecture_prompt_path,
207
  DEFAULT_LECTURE_PROMPT_TEMPLATE,
208
  ),
209
  "mcq_prompt_template": _read_text_if_exists(
210
+ mcq_prompt_path,
211
  DEFAULT_MCQ_PROMPT_TEMPLATE,
212
  ),
213
  "mcq_retry_prompt_template": _read_text_if_exists(
214
+ mcq_retry_prompt_path,
215
  DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
216
  ),
217
+ "feedback_prompt_template": _read_text_if_exists(
218
+ feedback_prompt_path,
219
+ "",
220
+ ),
221
+ "lecture_prompt_path": str(lecture_prompt_path.resolve()),
222
+ "mcq_prompt_path": str(mcq_prompt_path.resolve()),
223
+ "mcq_retry_prompt_path": str(mcq_retry_prompt_path.resolve()),
224
+ "feedback_prompt_path": str(feedback_prompt_path.resolve()),
225
  }
226
  configs[cid] = config
227
 
 
238
  "lecture_prompt_template": DEFAULT_LECTURE_PROMPT_TEMPLATE,
239
  "mcq_prompt_template": DEFAULT_MCQ_PROMPT_TEMPLATE,
240
  "mcq_retry_prompt_template": DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
241
+ "feedback_prompt_template": "",
242
  }
243
  return configs
244
 
 
248
 
249
 
250
  def get_character_config(character_id: Optional[str]) -> Dict[str, Any]:
251
+ global CHARACTER_CONFIGS, DEFAULT_CHARACTER_ID
252
+ # Reload from disk so prompt/template edits (e.g. mcq_prompt.txt) take effect immediately.
253
+ CHARACTER_CONFIGS = load_character_configs()
254
+ if DEFAULT_CHARACTER_ID not in CHARACTER_CONFIGS:
255
+ DEFAULT_CHARACTER_ID = next(iter(CHARACTER_CONFIGS.keys()))
256
  if character_id and character_id in CHARACTER_CONFIGS:
257
  return CHARACTER_CONFIGS[character_id]
258
  return CHARACTER_CONFIGS[DEFAULT_CHARACTER_ID]
 
717
  def _mock_generate_mcqs(self, lecture_text: str) -> List[MCQItem]:
718
  base_questions = [
719
  MCQItem(
720
+ question="What type of core problem does this paper most likely address?",
721
+ options=["Performance or efficiency bottlenecks in existing methods", "How to design database indexes", "How to build a frontend page", "How to compress video files"],
722
  answer="A",
723
+ explanation="Paper-reading tasks usually focus on limitations of prior methods, then propose improvements in performance, efficiency, or robustness.",
724
  ),
725
  MCQItem(
726
+ question="What is the best way to explain a paper's method?",
727
+ options=["Explain the pipeline from input to output by modules or steps", "Only list references", "Only show experiment tables without method details", "Only present conclusions without background"],
728
  answer="A",
729
+ explanation="A structured, step-by-step explanation helps learners understand how the paper moves from problem to solution.",
730
  ),
731
  MCQItem(
732
+ question="Why provide both answers and explanations in MCQs?",
733
+ options=["To enable feedback and error correction", "Only to make JSON longer", "Because Gradio requires explanations", "To reduce the number of questions"],
734
  answer="A",
735
+ explanation="Answer + explanation completes the teaching loop and helps users learn from mistakes.",
736
  ),
737
  MCQItem(
738
+ question="What is the risk of feeding a very long paper in one shot?",
739
+ options=["Context overflow can increase cost and cause information loss or failure", "The model automatically becomes more accurate", "TTS audio becomes shorter", "The PDF file gets corrupted"],
740
  answer="A",
741
+ explanation="Long documents usually need chunking and summarization to avoid context-window issues and quality degradation.",
742
  ),
743
  MCQItem(
744
+ question="In this demo pipeline, what is Qwen TTS used for?",
745
+ options=["Convert lecture text and explanations into audio", "Convert PDF to images", "Train Qwen3-VL-8B", "Generate new MCQ answers"],
746
  answer="A",
747
+ explanation="TTS turns text explanations into speech, improving interactivity and accessibility.",
748
  ),
749
  ]
750
  return base_questions
 
1026
  )
1027
  try:
1028
  mcqs = parse_mcq_json(raw_mcq_json)
1029
+ except (json.JSONDecodeError, ValueError):
1030
  retry_prompt = render_prompt_template(str(mcq_retry_template), pdf_excerpt)
1031
  retry_raw = self._real_generate_text_from_pdf(
1032
  pdf_path,
 
1048
 
1049
 
1050
  def parse_mcq_json(raw: str) -> List[MCQItem]:
1051
+ def _normalize_answer_label(answer_raw: Any, options: List[str]) -> str:
1052
+ s = str(answer_raw or "").strip()
1053
+ if not s:
1054
+ return ""
1055
+ up = s.upper()
1056
+ if up in {"A", "B", "C", "D"}:
1057
+ return up
1058
+ m = re.search(r"\b([ABCD])\b", up)
1059
+ if m:
1060
+ return m.group(1)
1061
+ if up.startswith("OPTION "):
1062
+ tail = up.replace("OPTION ", "", 1).strip()
1063
+ if tail in {"A", "B", "C", "D"}:
1064
+ return tail
1065
+ normalized_answer_text = normalize_option_text(s).strip().lower()
1066
+ if normalized_answer_text:
1067
+ for i, opt in enumerate(options[:4]):
1068
+ if normalized_answer_text == normalize_option_text(opt).strip().lower():
1069
+ return ["A", "B", "C", "D"][i]
1070
+ return ""
1071
+
1072
  cleaned = strip_code_fence(raw)
1073
  try:
1074
  payload = json.loads(cleaned)
 
1079
  payload = json.loads(cleaned[start:end + 1])
1080
  else:
1081
  raise
1082
+ if isinstance(payload, list):
1083
+ questions = payload
1084
+ else:
1085
+ questions = payload.get("questions", []) or payload.get("items", []) or payload.get("data", [])
1086
  parsed: List[MCQItem] = []
1087
  for item in questions[:5]:
1088
+ if not isinstance(item, dict):
1089
+ continue
1090
  q = str(item.get("question", "")).strip()
1091
+ options_raw = item.get("options", [])
1092
+ if not isinstance(options_raw, list):
1093
+ options_raw = item.get("choices", []) if isinstance(item.get("choices", []), list) else []
1094
+ options = [normalize_option_text(x) for x in options_raw][:4]
1095
+ explanation = str(
1096
+ item.get("explanation", "")
1097
+ or item.get("rationale", "")
1098
+ or item.get("reason", "")
1099
+ ).strip()
1100
+ answer = _normalize_answer_label(
1101
+ item.get("answer", "")
1102
+ or item.get("correct_answer", "")
1103
+ or item.get("correctOption", "")
1104
+ or item.get("correct", ""),
1105
+ options,
1106
+ )
1107
+ if not answer:
1108
+ idx_value = item.get("answer_index", item.get("correct_index", None))
1109
+ try:
1110
+ idx = int(idx_value)
1111
+ if 0 <= idx < 4:
1112
+ answer = ["A", "B", "C", "D"][idx]
1113
+ except Exception:
1114
+ pass
1115
  if len(options) != 4:
1116
  continue
1117
  if answer not in {"A", "B", "C", "D"}:
 
1257
  return items[seed % len(items)]
1258
 
1259
 
1260
+ def _character_feedback_style_from_mcq_prompt(character_id: str) -> str:
1261
+ cfg = get_character_config(character_id)
1262
+ prompt_text = str(cfg.get("mcq_prompt_template", "") or "")
1263
+ if not prompt_text.strip():
1264
+ return ""
1265
+
1266
+ role_line = ""
1267
+ tone_line = ""
1268
+ in_tone_block = False
1269
+ for raw in prompt_text.splitlines():
1270
+ line = raw.strip()
1271
+ if not line:
1272
+ continue
1273
+ lower = line.lower()
1274
+ if not role_line and lower.startswith("you are "):
1275
+ role_line = line
1276
+ continue
1277
+ if lower.startswith("tone:"):
1278
+ in_tone_block = True
1279
+ continue
1280
+ if in_tone_block:
1281
+ # Stop tone parsing when another section starts.
1282
+ if line.endswith(":"):
1283
+ in_tone_block = False
1284
+ continue
1285
+ tone_line = line
1286
+ in_tone_block = False
1287
+
1288
+ style_parts: List[str] = []
1289
+ if role_line:
1290
+ style_parts.append(role_line.rstrip("."))
1291
+ if tone_line:
1292
+ style_parts.append(f"Tone: {tone_line}")
1293
+ return " ".join(style_parts).strip()
1294
+
1295
+
1296
  def _examiner_style_prompt(character_id: str) -> str:
1297
+ cfg = get_character_config(character_id)
1298
+ feedback_prompt = str(cfg.get("feedback_prompt_template", "") or "").strip()
1299
+ if feedback_prompt:
1300
+ return feedback_prompt
1301
+
1302
+ character_style = _character_feedback_style_from_mcq_prompt(character_id)
1303
+ if character_style:
 
 
 
 
 
 
 
 
 
1304
  return (
1305
+ f"{character_style}. "
1306
+ "You are giving live exam feedback after each answer. "
1307
+ "Respond in concise English, in-character, practical, and pointed. "
1308
+ "No markdown, no emojis, no stage directions."
 
 
 
1309
  )
1310
  return (
1311
+ "You are an examiner giving live feedback after each answer. "
1312
+ "Respond in concise English and focus on the student's performance. "
1313
+ "No markdown, no emojis."
1314
  )
1315
 
1316
 
 
1323
  if engine.mock_mode:
1324
  return ""
1325
  ctx = " ".join(str(context or "").strip().split())
 
 
 
 
1326
  if kind == "correct":
1327
+ instruction = f"Write ONE short English sentence for a correct answer. Context: {ctx}. Max 16 words. No markdown. No emojis."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1328
  elif kind == "incorrect":
1329
+ instruction = f"Write ONE short English sentence for an incorrect answer without giving the option letter. Context: {ctx}. Max 20 words. No markdown. No emojis."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1330
  else:
1331
+ instruction = f"Write 1-2 short English final remarks with one concrete revision suggestion. Context: {ctx}. Max 28 words total. No markdown. No emojis."
 
 
 
 
 
 
1332
  text = _llm_exam_feedback(
1333
  [
1334
  {"role": "system", "content": _examiner_style_prompt(character_id)},
 
1345
  if "snape" in cid:
1346
  return _pick_variant(
1347
  [
1348
+ "Correct. Keep going.",
1349
+ "Right answer. Stay focused.",
1350
+ "Good. Next question.",
1351
+ "Exactly. Keep your pace.",
1352
  ],
1353
  q_index,
1354
  )
1355
  if "mcgonagall" in cid or "mcg" in cid:
1356
  return _pick_variant(
1357
  [
1358
+ "That's correct. Keep it up.",
1359
+ "Good work. Move on.",
1360
+ "Well done. Stay consistent.",
1361
+ "Precisely. Continue.",
1362
  ],
1363
  q_index,
1364
  )
 
1389
  if "snape" in cid:
1390
  opener = _pick_variant(
1391
  [
1392
+ "Wrong. Read more carefully.",
1393
+ "Incorrect. Check the prompt details.",
1394
+ "Not correct. Your reading is too loose.",
1395
+ "Incorrect. Be more rigorous.",
1396
  ],
1397
  q_index,
1398
  )
 
1400
  if "mcgonagall" in cid or "mcg" in cid:
1401
  opener = _pick_variant(
1402
  [
1403
+ "Incorrect. Think first, then answer.",
1404
+ "Not quite. Slow down and read precisely.",
1405
+ "Wrong. Stop guessing.",
1406
+ "Incorrect. Focus on the method itself.",
1407
  ],
1408
  q_index,
1409
  )
 
1428
  band = _score_band(score, total)
1429
  if "snape" in cid:
1430
  mapping = {
1431
+ "excellent": "Excellent performance this time.",
1432
+ "good": "Good. Keep polishing details.",
1433
+ "fair": "Fair. More practice is needed.",
1434
+ "poor": "Poor. Review the lecture and retry.",
1435
+ "none": "No score available yet.",
1436
  }
1437
+ return mapping.get(band, "Quiz finished.")
1438
  if "mcgonagall" in cid or "mcg" in cid:
1439
  mapping = {
1440
+ "excellent": "Excellent. Keep this standard.",
1441
+ "good": "Good understanding. Improve the details.",
1442
+ "fair": "Passable, but not stable yet.",
1443
+ "poor": "Not acceptable. Review and try again.",
1444
+ "none": "No score available yet.",
1445
  }
1446
+ return mapping.get(band, "Quiz finished.")
1447
  return f"Final score: {score} / {total}."
1448
  try:
1449
  remark = _llm_short_exam_remark(
 
1461
  def _roleplay_explain_feedback(character_id: str) -> str:
1462
  cid = (character_id or "").lower()
1463
  if "snape" in cid:
1464
+ return "Lecture is ready. Select a chunk to play, then go to the exam."
1465
  if "mcgonagall" in cid or "mcg" in cid:
1466
+ return "Lecture is ready. Review it carefully, then enter the exam."
1467
+ return "Lecture is ready. Review it, then enter the exam."
1468
 
1469
 
1470
  def _roleplay_loading_text(character_id: str, *, phase: str) -> str:
 
1475
  if "snape" in cid:
1476
  return f"Professor {name} is scrutinizing your paper…"
1477
  if "mcgonagall" in cid or "mcg" in cid:
1478
+ return f"Professor {name} is reviewing your paper with strict precision…"
1479
  return f"Professor {name} is reviewing your paper…"
1480
  if "snape" in cid:
1481
+ return f"Professor {name} is preparing a rigorous exam…"
1482
  if "mcgonagall" in cid or "mcg" in cid:
1483
+ return f"Professor {name} is preparing challenging questions…"
1484
+ return f"Professor {name} is preparing your exam materials…"
1485
 
1486
 
1487
  def build_loading_html(text: str) -> str:
 
1544
  submit_interactive = quiz_ready and not state.get("completed", False)
1545
  radio_interactive = submit_interactive
1546
  lecture_tts_ready = bool(state.get("lecture_text"))
1547
+ picker_choices = paragraph_picker_choices(state.get("lecture_text", ""))
1548
+ selected_paragraph_value = paragraph_picker_value_for_idx(
1549
+ state.get("lecture_text", ""),
1550
+ str(state.get("selected_paragraph_idx", "")).strip(),
1551
+ )
1552
+ if selected_paragraph_value is None and picker_choices:
1553
+ selected_paragraph_value = picker_choices[0][1]
1554
  if state.get("completed"):
1555
  radio_interactive = False
1556
  return (
 
1565
  state.get("status", "Idle"),
1566
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1567
  gr.update(
1568
+ choices=picker_choices,
1569
  value=selected_paragraph_value,
1570
  interactive=lecture_tts_ready,
1571
  visible=lecture_tts_ready,
 
1573
  state.get("lecture_audio_path", None),
1574
  gr.update(interactive=lecture_tts_ready),
1575
  gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
1576
+ gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
1577
  gr.update(visible=exam_picker_visible),
1578
  gr.update(value=build_exam_chat_html(state), visible=show_exam_page and (quiz_ready or bool(state.get("exam_chat")))),
1579
  gr.update(choices=current_choices(state), value=None, interactive=radio_interactive),
 
1592
  yield reset_ui_from_state(state, feedback="Upload a PDF to start.", results_visible=False, loading_visible=False)
1593
  return
1594
 
1595
+ state["status"] = "Generating..."
1596
  yield reset_ui_from_state(
1597
  state,
1598
+ feedback="Reading the paper and generating lecture/quiz content...",
1599
  results_visible=False,
1600
  loading_visible=True,
1601
  loading_text=_roleplay_loading_text(state.get("character_id") or DEFAULT_CHARACTER_ID, phase="lecture"),
 
1626
  loading_visible=False,
1627
  )
1628
  except Exception as exc:
1629
+ state["status"] = "Generation failed."
1630
  state["lecture_text"] = f"Error: {type(exc).__name__}: {exc}"
1631
  state["current_page"] = "explain"
1632
  yield reset_ui_from_state(
 
1640
  def submit_answer(choice: Optional[str], state: Dict[str, Any]):
1641
  if not state.get("mcqs"):
1642
  state["status"] = "No quiz loaded."
1643
+ return reset_ui_from_state(state, feedback="Upload a PDF and generate lecture first.")
1644
  if state.get("completed"):
1645
  return reset_ui_from_state(state, feedback="Quiz already completed.")
1646
  if not choice:
 
1718
 
1719
  def restart_quiz(state: Dict[str, Any]):
1720
  if not state.get("mcqs"):
1721
+ return reset_ui_from_state(new_session_state(), feedback="Upload a PDF and generate lecture first.")
1722
  state["current_index"] = 0
1723
  state["score"] = 0
1724
  state["awaiting_next_after_wrong"] = False
 
1767
  yield reset_ui_from_state(state, feedback="Generate lecture first.", results_visible=False, loading_visible=False)
1768
  return
1769
  if not selected_character_id:
1770
+ state["status"] = "Please choose an examiner."
1771
  yield reset_ui_from_state(state, feedback="", results_visible=True, loading_visible=False)
1772
  return
1773
 
 
1803
  state["current_page"] = "exam"
1804
  state["mcq_generating"] = False
1805
  _ensure_current_question_in_exam_chat(state)
1806
+ state["status"] = "Exam prepared."
1807
  yield reset_ui_from_state(
1808
  state,
1809
  feedback="",
 
1813
  except Exception as exc:
1814
  state["current_page"] = "exam"
1815
  state["mcq_generating"] = False
1816
+ state["status"] = "Exam generation failed."
1817
  _append_exam_assistant_text(
1818
  state,
1819
+ f"Failed to generate exam.\nError: {type(exc).__name__}: {exc}",
1820
  kind="note",
1821
  )
1822
  yield reset_ui_from_state(
 
1893
  backend = _tts_backend_name()
1894
  voice = tts_voice_for_character(state.get("character_id"))
1895
  try:
1896
+ state["status"] = f"Generating full lecture audio ({backend})..."
1897
  state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
1898
+ state["status"] = "Full lecture audio ready."
1899
  return (
1900
  state,
1901
  state["status"],
1902
  state["lecture_audio_path"],
1903
+ f"Full lecture audio generated via `{backend}`.",
1904
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
1905
  )
1906
  except Exception as exc:
1907
+ state["status"] = "Full lecture audio generation failed."
1908
  return (
1909
  state,
1910
  state["status"],
 
1919
  if not s:
1920
  return []
1921
  pieces = re.split(r"\n\s*\n+", s)
1922
+ paragraphs = [p.strip() for p in pieces if p and p.strip()]
1923
+ # If the model outputs a single giant paragraph, fall back to sentence-pair chunks
1924
+ # so the selector always has usable granularity for TTS.
1925
+ if len(paragraphs) <= 1:
1926
+ fallback_chunks = split_text_every_two_sentences(s, max_len=420)
1927
+ if len(fallback_chunks) > 1:
1928
+ return [c.strip() for c in fallback_chunks if c and c.strip()]
1929
+ return paragraphs
1930
 
1931
 
1932
  def paragraph_picker_choices(lecture_text: str) -> List[tuple[str, str]]:
 
1940
  return choices
1941
 
1942
 
1943
+ def paragraph_picker_idx_from_value(value: Any) -> str:
1944
+ s = str(value or "").strip()
1945
+ if not s:
1946
+ return ""
1947
+ if s.isdigit():
1948
+ return s
1949
+ m = re.match(r"^\s*(\d+)\s*[\.、::-]", s)
1950
+ if not m:
1951
+ return ""
1952
+ return str(max(0, int(m.group(1)) - 1))
1953
+
1954
+
1955
+ def paragraph_picker_value_for_idx(lecture_text: str, idx: str) -> Optional[str]:
1956
+ try:
1957
+ i = int(str(idx or "").strip())
1958
+ except Exception:
1959
+ return None
1960
+ paragraphs = split_lecture_paragraphs(lecture_text)
1961
+ if i < 0 or i >= len(paragraphs):
1962
+ return None
1963
+ return str(i)
1964
+
1965
+
1966
  def build_clickable_lecture_html(lecture_text: str, selected_idx: str = "") -> str:
1967
  paragraphs = split_lecture_paragraphs(lecture_text)
1968
  if not paragraphs:
 
1981
  else ""
1982
  )
1983
  parts.append(
1984
+ f'<div class="lecture-paragraph{selected_cls}" data-idx="{i}" style="{selected_style}">'
1985
+ f'<div class="chunk-text">{safe}</div>'
1986
+ f'</div>'
1987
  )
1988
  parts.append("</div>")
1989
  return "".join(parts)
 
1993
  lecture_text = state.get("lecture_text", "")
1994
  paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
1995
  if not paragraphs:
1996
+ state["status"] = "No lecture content available."
1997
  return (
1998
  state,
1999
  state.get("status", "Idle"),
2000
  state.get("lecture_audio_path"),
2001
+ "Generate lecture first.",
2002
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
2003
  )
2004
 
 
2007
  except Exception:
2008
  idx = -1
2009
  if idx < 0 or idx >= len(paragraphs):
2010
+ state["status"] = "Invalid chunk selection."
2011
  return (
2012
  state,
2013
  state.get("status", "Idle"),
2014
  state.get("lecture_audio_path"),
2015
+ "Please select a valid chunk.",
2016
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
2017
  )
2018
 
 
2020
  voice = tts_voice_for_character(state.get("character_id"))
2021
  try:
2022
  state["selected_paragraph_idx"] = str(idx)
2023
+ state["status"] = f"Generating chunk audio ({backend})..."
2024
  audio_path = engine.synthesize_tts(
2025
  paragraphs[idx],
2026
  name_prefix=f"lecture_p{idx+1}",
2027
  voice=voice,
2028
  )
2029
  state["lecture_audio_path"] = audio_path
2030
+ state["status"] = "Chunk audio ready."
2031
  char_len = len(paragraphs[idx])
2032
  return (
2033
  state,
2034
  state["status"],
2035
  audio_path,
2036
+ f"Generated chunk {idx+1}/{len(paragraphs)} ({char_len} chars). You can play it below.",
2037
  build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
2038
  )
2039
  except Exception as exc:
2040
+ state["status"] = "Chunk audio generation failed."
2041
  return (
2042
  state,
2043
  state["status"],
 
2050
  def play_explanation_audio(state: Dict[str, Any]):
2051
  text = state.get("last_explanation_tts_text", "")
2052
  if not text:
2053
+ state["status"] = "No explanation available for audio."
2054
+ return state, state["status"], state.get("explanation_audio_path"), "Answer a question first."
2055
  voice = tts_voice_for_character(state.get("exam_character_id") or state.get("character_id"))
2056
  try:
2057
  state["status"] = "Generating explanation audio..."
 
2065
 
2066
  def on_play_lecture_audio_click(state: Dict[str, Any]):
2067
  state, status, audio_path, feedback, lecture_html = play_lecture_audio(state)
2068
+ lecture_text = state.get("lecture_text", "")
2069
+ picker_choices = paragraph_picker_choices(lecture_text)
2070
+ selected_paragraph_value = paragraph_picker_value_for_idx(
2071
+ lecture_text,
2072
+ str(state.get("selected_paragraph_idx", "")).strip(),
2073
+ )
2074
+ if selected_paragraph_value is None and picker_choices:
2075
+ selected_paragraph_value = picker_choices[0][1]
2076
+ lecture_tts_ready = bool(lecture_text)
2077
  return (
2078
  state,
2079
  status,
2080
  audio_path,
2081
  feedback,
2082
  lecture_html,
2083
+ gr.update(
2084
+ choices=picker_choices,
2085
+ value=selected_paragraph_value,
2086
+ interactive=lecture_tts_ready,
2087
+ visible=lecture_tts_ready,
2088
+ ),
2089
  )
2090
 
2091
 
2092
  def on_play_paragraph_click(paragraph_idx: str, state: Dict[str, Any]):
2093
+ idx_value = paragraph_picker_idx_from_value(paragraph_idx)
2094
+ state, status, audio_path, feedback, lecture_html = play_lecture_paragraph_audio(idx_value, state)
2095
+ lecture_text = state.get("lecture_text", "")
2096
+ picker_choices = paragraph_picker_choices(lecture_text)
2097
+ selected_paragraph_value = paragraph_picker_value_for_idx(
2098
+ lecture_text,
2099
+ str(state.get("selected_paragraph_idx", "")).strip(),
2100
+ )
2101
+ if selected_paragraph_value is None and picker_choices:
2102
+ selected_paragraph_value = picker_choices[0][1]
2103
+ lecture_tts_ready = bool(lecture_text)
2104
  return (
2105
  state,
2106
  status,
2107
  audio_path,
2108
  feedback,
2109
  lecture_html,
2110
+ gr.update(
2111
+ choices=picker_choices,
2112
+ value=selected_paragraph_value,
2113
+ interactive=lecture_tts_ready,
2114
+ visible=lecture_tts_ready,
2115
+ ),
2116
  )
2117
 
2118
 
 
2427
  #lecture-clickable .lecture-clickable,
2428
  #lecture-clickable .lecture-clickable * {{
2429
  pointer-events: auto !important;
2430
+ opacity: 1 !important;
2431
+ filter: none !important;
2432
  }}
2433
  #lecture-clickable .lecture-paragraph {{
2434
+ cursor: default;
2435
  pointer-events: auto !important;
2436
  padding: 10px 12px;
2437
  border-radius: 14px;
 
2442
  line-height: 1.45 !important;
2443
  color: rgba(244,246,251,0.95) !important;
2444
  }}
2445
+ #lecture-clickable .chunk-text {{
2446
+ flex: 1 1 auto;
2447
+ min-width: 0;
2448
+ }}
2449
  #lecture-clickable .lecture-paragraph:hover {{
2450
  background: rgba(255,255,255,0.08);
2451
  border-color: rgba(255,255,255,0.14);
 
2462
  box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
2463
  color: #ffffff !important;
2464
  }}
2465
+ #lecture-wrap [disabled],
2466
+ #lecture-wrap [aria-disabled="true"],
2467
+ #lecture-wrap .disabled,
2468
+ #lecture-wrap .pending,
2469
+ #lecture-wrap .loading,
2470
+ #lecture-wrap .generating {{
2471
+ opacity: 1 !important;
2472
+ filter: none !important;
2473
+ }}
2474
  .lecture-empty {{
2475
  padding: 10px 12px;
2476
  color: rgba(244,246,251,0.72);
 
2504
  #play-paragraph-btn {{
2505
  display: none !important;
2506
  }}
2507
+ #chunk-controls {{
2508
+ margin-top: 8px !important;
2509
+ align-items: start !important;
2510
+ gap: 8px !important;
2511
+ overflow: visible !important;
2512
+ position: relative !important;
2513
+ z-index: 60 !important;
2514
+ }}
2515
+ #tts-wrap,
2516
+ #paragraph-picker,
2517
+ #paragraph-picker .wrap {{
2518
+ overflow: visible !important;
2519
+ }}
2520
+ #paragraph-picker .wrap {{
2521
+ max-height: 320px !important;
2522
+ overflow: auto !important;
2523
+ border: 1px solid rgba(255,255,255,0.10) !important;
2524
+ border-radius: 12px !important;
2525
+ padding: 8px !important;
2526
+ }}
2527
+ #paragraph-picker label {{
2528
+ border: 1px solid rgba(255,255,255,0.08) !important;
2529
+ border-radius: 10px !important;
2530
+ padding: 8px 10px !important;
2531
+ margin-bottom: 6px !important;
2532
+ background: rgba(255,255,255,0.03) !important;
2533
+ }}
2534
+ #paragraph-picker label:hover {{
2535
+ background: rgba(255,255,255,0.06) !important;
2536
+ border-color: rgba(255,255,255,0.14) !important;
2537
+ }}
2538
+ #paragraph-picker input[type="radio"]:checked + span {{
2539
+ color: #f97316 !important;
2540
+ font-weight: 700 !important;
2541
+ }}
2542
+ #play-selected-chunk-btn button {{
2543
+ min-height: 42px !important;
2544
+ height: 42px !important;
2545
+ border-radius: 999px !important;
2546
+ background: #f97316 !important;
2547
+ border-color: #f97316 !important;
2548
+ color: #ffffff !important;
2549
+ font-weight: 700 !important;
2550
+ font-size: 18px !important;
2551
+ line-height: 1 !important;
2552
+ padding: 0 14px !important;
2553
+ }}
2554
+ #play-selected-chunk-btn button:hover {{
2555
+ background: #ea580c !important;
2556
+ border-color: #ea580c !important;
2557
+ }}
2558
+ #play-selected-chunk-btn button[disabled] {{
2559
+ background: #f97316 !important;
2560
+ border-color: #f97316 !important;
2561
+ color: #ffffff !important;
2562
+ opacity: 0.75 !important;
2563
+ }}
2564
  @keyframes tts_loading {{
2565
  100% {{ background-size: 110%; }}
2566
  }}
 
2746
  border-color: rgba(15, 23, 42, 0.10);
2747
  color: #0f172a !important;
2748
  }}
2749
+ #lecture-clickable .lecture-row {{
2750
+ display: block;
2751
+ }}
2752
  #lecture-clickable .lecture-paragraph:hover {{
2753
  background: rgba(15, 23, 42, 0.06);
2754
  border-color: rgba(15, 23, 42, 0.16);
 
3138
  const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
3139
  if (state.bound) return;
3140
  try {
3141
+ const getRoots = () => {
3142
+ const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
3143
+ return [
3144
+ document,
3145
+ grRoot && grRoot.shadowRoot ? grRoot.shadowRoot : null,
3146
+ grRoot,
3147
+ ].filter(Boolean);
3148
+ };
3149
  const q = (sel) => {
3150
+ for (const r of getRoots()) {
3151
  const el = r.querySelector ? r.querySelector(sel) : null;
3152
  if (el) return el;
3153
  }
 
3175
  if (!audio) return;
3176
  if (audio.__ttsBound) return;
3177
  audio.__ttsBound = true;
3178
+ audio.addEventListener("loadstart", () => showLoading("Loading audio..."), true);
3179
+ audio.addEventListener("waiting", () => showLoading("Loading audio..."), true);
3180
  audio.addEventListener("canplay", () => hideLoading(), true);
3181
  audio.addEventListener("playing", () => hideLoading(), true);
3182
  audio.addEventListener("error", () => hideLoading(), true);
 
3194
  boxShadow: "0 0 0 1px rgba(255,255,255,0.16) inset",
3195
  color: "#ffffff",
3196
  };
3197
+ for (const r of getRoots()) {
3198
+ const rowNodes = r.querySelectorAll ? r.querySelectorAll("#lecture-clickable .lecture-row.is-selected, #lecture-clickable .lecture-row[data-selected='1']") : [];
3199
+ for (const row of rowNodes) {
3200
+ row.classList.remove("is-selected");
3201
+ row.removeAttribute("data-selected");
3202
+ }
3203
  const nodes = r.querySelectorAll ? r.querySelectorAll("#lecture-clickable .lecture-paragraph.is-selected") : [];
3204
  for (const node of nodes) {
3205
  node.classList.remove("is-selected");
 
3215
  if (para && para.classList) {
3216
  para.classList.add("is-selected");
3217
  para.setAttribute("data-selected", "1");
3218
+ const row = para.closest ? para.closest(".lecture-row") : null;
3219
+ if (row && row.classList) {
3220
+ row.classList.add("is-selected");
3221
+ row.setAttribute("data-selected", "1");
3222
+ }
3223
  if (para.style) {
3224
  para.style.setProperty("background", selectedInlineStyle.background, "important");
3225
  para.style.setProperty("border-color", selectedInlineStyle.borderColor, "important");
 
3234
  input = inputWrap && inputWrap.querySelector ? inputWrap.querySelector("textarea, input") : null;
3235
  }
3236
  if (!input) {
3237
+ showLoading("Chunk selector not found. Please refresh the page.");
3238
  return;
3239
  }
3240
  input.value = indexText;
 
3248
  if (innerBtn) btn = innerBtn;
3249
  }
3250
  if (!btn) {
3251
+ showLoading("Chunk play control not found. Please refresh the page.");
3252
  return;
3253
  }
3254
+ showLoading("Generating audio...");
3255
  btn.click();
3256
  };
3257
  window.__lectureSelectParagraph = (idx, el, autoPlay = true) => {
 
3261
  const paragraphFromEvent = (e) => {
3262
  const target = e ? e.target : null;
3263
  if (target && target.nodeType === 1 && target.closest) {
3264
+ const btn = target.closest(".chunk-play-btn");
3265
+ if (btn) {
3266
+ const row = btn.closest(".lecture-row");
3267
+ if (row && row.querySelector) {
3268
+ const p = row.querySelector(".lecture-paragraph");
3269
+ if (p) return p;
3270
+ }
3271
+ }
3272
  const p = target.closest(".lecture-paragraph");
3273
  if (p) return p;
3274
  }
3275
  const path = (e && typeof e.composedPath === "function") ? e.composedPath() : [];
3276
  for (const n of path) {
3277
  if (n && n.classList && n.classList.contains("lecture-paragraph")) return n;
3278
+ if (n && n.classList && n.classList.contains("lecture-row") && n.querySelector) {
3279
+ const p = n.querySelector(".lecture-paragraph");
3280
+ if (p) return p;
3281
+ }
3282
  }
3283
  return null;
3284
  };
 
3286
  const onParagraphClick = (e) => {
3287
  const para = paragraphFromEvent(e);
3288
  if (!para) return;
3289
+ if (e && e.target && e.target.closest && e.target.closest(".chunk-play-btn")) {
3290
+ try { e.preventDefault(); } catch (_) {}
3291
+ try { e.stopPropagation(); } catch (_) {}
3292
+ }
3293
  const idx = para.getAttribute("data-idx");
3294
  if (typeof idx !== "string" || idx.trim() === "") return;
3295
  selectParagraph(idx, para, true);
3296
  };
3297
+ const onChunkButtonClick = (e) => {
3298
+ const btn = e && e.target && e.target.closest ? e.target.closest(".chunk-play-btn") : null;
3299
+ if (!btn) return;
3300
+ try { e.preventDefault(); } catch (_) {}
3301
+ try { e.stopPropagation(); } catch (_) {}
3302
+ const row = btn.closest ? btn.closest(".lecture-row") : null;
3303
+ const para = row && row.querySelector ? row.querySelector(".lecture-paragraph") : null;
3304
+ const idx = (btn.getAttribute && btn.getAttribute("data-idx")) || (para && para.getAttribute ? para.getAttribute("data-idx") : "");
3305
+ if (!para || typeof idx !== "string" || idx.trim() === "") return;
3306
+ selectParagraph(idx, para, true);
3307
+ };
3308
  const bindClickRoot = (root) => {
3309
  if (!root || !root.addEventListener) return;
3310
  if (root.__lectureClickBound) return;
3311
  root.__lectureClickBound = true;
3312
  root.addEventListener("click", onParagraphClick, true);
3313
  };
3314
+ const bindParagraphDomHandlers = () => {
3315
+ for (const r of getRoots()) {
3316
+ if (!r || !r.querySelectorAll) continue;
3317
+ const btns = r.querySelectorAll("#lecture-clickable .chunk-play-btn");
3318
+ for (const btn of btns) {
3319
+ if (btn.__chunkPlayBound) continue;
3320
+ btn.__chunkPlayBound = true;
3321
+ btn.addEventListener("click", onChunkButtonClick, true);
3322
+ }
3323
+ }
3324
+ };
3325
 
3326
+ for (const r of getRoots()) bindClickRoot(r);
3327
  bindClickRoot(window);
3328
+ bindParagraphDomHandlers();
3329
 
3330
  if (!state.rebindObserver) {
3331
  state.rebindObserver = new MutationObserver(() => {
3332
+ for (const r of getRoots()) bindClickRoot(r);
3333
+ bindParagraphDomHandlers();
 
 
3334
  });
3335
  state.rebindObserver.observe(document.body, { childList: true, subtree: true });
3336
  }
 
3354
  build_clickable_lecture_html(""),
3355
  elem_id="lecture-clickable",
3356
  )
3357
+ play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, visible=False)
3358
+ gr.Markdown("Tip: Select a chunk from the list below (left dot), then click the play button on the right.", elem_id="paragraph-tts-tip")
3359
+ lecture_feedback = gr.Markdown("")
3360
+
3361
+ with gr.Column(elem_id="tts-wrap"):
3362
+ lecture_audio = gr.Audio(label="Lecture TTS", type="filepath", elem_id="lecture-audio")
3363
+ gr.HTML(
3364
+ '<div id="tts-loading" aria-hidden="true" style="display:none"><div class="tts-loading-row"><div class="tts-loading-bar"></div><div class="tts-loading-text" id="tts-loading-text">Loading audio...</div></div></div>',
3365
+ )
3366
+ with gr.Row(elem_id="chunk-controls"):
3367
  paragraph_picker = gr.Radio(
3368
  choices=[],
3369
  value=None,
3370
  interactive=False,
3371
  visible=False,
3372
+ label="Select Chunk",
3373
  elem_id="paragraph-picker",
3374
+ scale=8,
3375
  )
3376
+ play_selected_chunk_btn = gr.Button("", elem_id="play-selected-chunk-btn", visible=False, interactive=False, scale=1, min_width=52)
 
 
 
 
 
 
 
 
3377
  paragraph_idx = gr.Textbox(value="", label="", show_label=False, elem_id="selected-paragraph")
3378
+ play_paragraph_btn = gr.Button("Play Chunk", elem_id="play-paragraph-btn")
3379
+ with gr.Row(elem_id="exam-entry-wrap"):
3380
+ exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
3381
 
3382
  with gr.Column(visible=False, elem_id="exam-picker-overlay") as exam_picker_overlay:
3383
  with gr.Column(elem_id="exam-picker-modal"):
 
3412
  requestAnimationFrame(doScroll);
3413
  setTimeout(doScroll, 50);
3414
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3415
  const ensure = () => {
 
3416
  const root = document.querySelector('#exam-chat');
3417
  const wrap = root ? root.querySelector('.exam-chat-wrap') : null;
3418
  if (!root || !wrap) return;
 
3467
  paragraph_picker,
3468
  lecture_audio,
3469
  play_lecture_btn,
3470
+ play_selected_chunk_btn,
3471
  exam_btn,
3472
  exam_picker_overlay,
3473
  exam_chat,
 
3503
  outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
3504
  show_progress="minimal",
3505
  )
3506
+ play_selected_chunk_btn.click(
3507
  fn=on_play_paragraph_click,
3508
  inputs=[paragraph_picker, state],
3509
  outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
characters/Mcgonagall/mcq_retry_prompt.txt CHANGED
@@ -1,26 +1,17 @@
1
  You are Professor Minerva McGonagall.
2
 
3
- Your previous output failed to parse. Correct it by producing STRICTLY valid JSON only.
 
4
 
5
- Hard rules:
6
- - Ignore References, Bibliography, Appendix, and supplementary sections.
7
- - Create exactly 5 MCQs (A–D).
8
- - Do not invent details.
9
- - Options must be plain strings WITHOUT leading labels like "A.", "B)", etc.
10
- - Output JSON only. No extra text, no markdown, no code fences.
11
- - Keep explanations in character (stern, academic, pointed), without stage directions.
12
 
13
- JSON schema (exact):
14
- {
15
- "questions": [
16
- {
17
- "question": "…",
18
- "options": ["…", "…", "…", "…"],
19
- "answer": "A",
20
- "explanation": "…"
21
- }
22
- ]
23
- }
24
 
25
  Paper text:
26
  {document}
 
1
  You are Professor Minerva McGonagall.
2
 
3
+ Output STRICT JSON only for 5 MCQs, with this exact shape:
4
+ {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
5
 
6
+ Hard constraints:
7
+ - Exactly 5 questions.
8
+ - Exactly 4 options per question.
9
+ - answer MUST be A/B/C/D only.
10
+ - explanation must be non-empty and concise.
11
+ - No markdown, no preface, no trailing text.
12
+ - No option prefixes like "A." or "B)".
13
 
14
+ Keep tone strict, fair, and academically rigorous.
 
 
 
 
 
 
 
 
 
 
15
 
16
  Paper text:
17
  {document}
characters/snape/feedback.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are Professor Severus Snape giving live feedback in an oral exam.
2
+
3
+ Voice and style:
4
+ - Cold, precise, cutting, academically strict.
5
+ - Sound unmistakably like Snape: terse, disdainful of sloppy thinking, controlled sarcasm.
6
+ - Address the student directly as "you".
7
+ - Keep each response short and punchy.
8
+
9
+ Behavior rules:
10
+ - Always stay in character.
11
+ - Focus only on the student's answer quality and reasoning discipline.
12
+ - Do not invent facts beyond the paper content.
13
+ - No emojis, no markdown, no stage directions, no roleplay brackets.
14
+ - Do not mention Harry Potter universe lore, spells, or named artifacts.
15
+
16
+ Output constraints:
17
+ - Return plain English text only.
18
+ - Keep comments practical and exam-focused.
characters/snape/mcq_prompt.txt CHANGED
@@ -45,4 +45,4 @@ Requirements:
45
  - Options must NOT include leading labels like "A.", "B)", etc. The UI will add A/B/C/D.
46
 
47
  Paper text:
48
- {document}
 
45
  - Options must NOT include leading labels like "A.", "B)", etc. The UI will add A/B/C/D.
46
 
47
  Paper text:
48
+ {document}
characters/snape/mcq_retry_prompt.txt CHANGED
@@ -1,26 +1,17 @@
1
  You are Professor Severus Snape.
2
 
3
- Your previous output failed to parse. Fix it. Produce STRICTLY valid JSON only.
 
4
 
5
- Hard rules:
6
- - Ignore References, Bibliography, Appendix, and supplementary sections.
7
- - Create exactly 5 MCQs (A–D).
8
- - Do not invent details.
9
- - Options must be plain strings WITHOUT leading labels like "A.", "B)", etc.
10
- - Output JSON only. No extra text, no markdown, no code fences.
11
- - Keep explanations in character (cold, sharp, slightly theatrical), without stage directions.
12
 
13
- JSON schema (exact):
14
- {
15
- "questions": [
16
- {
17
- "question": "…",
18
- "options": ["…", "…", "…", "…"],
19
- "answer": "A",
20
- "explanation": "…"
21
- }
22
- ]
23
- }
24
 
25
  Paper text:
26
  {document}
 
1
  You are Professor Severus Snape.
2
 
3
+ Output STRICT JSON only for 5 MCQs, with this exact shape:
4
+ {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
5
 
6
+ Hard constraints:
7
+ - Exactly 5 questions.
8
+ - Exactly 4 options per question.
9
+ - answer MUST be A/B/C/D only.
10
+ - explanation must be non-empty and concise.
11
+ - No markdown, no preface, no trailing text.
12
+ - No option prefixes like "A." or "B)".
13
 
14
+ Keep tone strict, sharp, and academically demanding.
 
 
 
 
 
 
 
 
 
 
15
 
16
  Paper text:
17
  {document}
characters/test/avatar.jpg DELETED
Binary file (62 kB)
 
characters/test/lecture_prompt.txt DELETED
@@ -1,10 +0,0 @@
1
- 你是一名风格冷静、严谨但清晰的课程助教(Snape 风格),请阅读用户上传的论文内容,并输出一段中文讲解,要求:
2
- 1. 先说明论文要解决的问题和背景;
3
- 2. 再解释核心方法(按步骤/模块);
4
- 3. 再总结实验结果或亮点;
5
- 4. 最后给出局限性与适用场景;
6
- 5. 语言精炼、逻辑清楚,适合课堂讲解(约 400-700 字);
7
- 6. 不要虚构论文中不存在的实验细节。
8
-
9
- 论文内容(可能是节选):
10
- {document}
 
 
 
 
 
 
 
 
 
 
 
characters/test/mcq_prompt.txt DELETED
@@ -1,23 +0,0 @@
1
- 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
2
- 严格输出 JSON(不要 markdown 代码块),格式如下:
3
- {
4
- "questions": [
5
- {
6
- "question": "...",
7
- "options": ["A选项", "B选项", "C选项", "D选项"],
8
- "answer": "A",
9
- "explanation": "..."
10
- }
11
- ]
12
- }
13
-
14
- 要求:
15
- 1. 共 5 题;
16
- 2. 每题 4 个选项;
17
- 3. answer 必须是 A/B/C/D;
18
- 4. 解析要说明为什么正确,以及常见误区;
19
- 5. 题目覆盖背景、方法、结果、局限性;
20
- 6. 题目难度适中,适合课程课堂测验。
21
-
22
- 论文内容(可能是节选):
23
- {document}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
characters/test/mcq_retry_prompt.txt DELETED
@@ -1,14 +0,0 @@
1
- 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
2
-
3
- 限制:
4
- 1. 必须是紧凑 JSON(单行也可以);
5
- 2. 共 5 题;
6
- 3. 每题字段:question、options(4项)、answer(A/B/C/D)、explanation;
7
- 4. explanation 保持简短(1-2句);
8
- 5. 不要输出任何 JSON 以外内容。
9
-
10
- 输出格式:
11
- {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
12
-
13
- 论文内容:
14
- {document}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
characters/test/meta.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "id": "test",
3
- "display_name": "Test",
4
- "tagline": "Research paper explainer · MCQ coach",
5
- "byline": "By @local-demo",
6
- "chat_label": "Test",
7
- "chat_mode": "paper mode",
8
- "avatar": "avatar.jpg",
9
- "lecture_prompt_file": "lecture_prompt.txt",
10
- "mcq_prompt_file": "mcq_prompt.txt",
11
- "mcq_retry_prompt_file": "mcq_retry_prompt.txt"
12
- }