Character_Based_AI_Paper_Tutor

Sleeping

App Files Files Community

XXiao commited on 11 days ago

Commit

df2999f

1 Parent(s): 6c66831

updated

Browse files

Files changed (2) hide show

README.md +2 -0
app.py +381 -72

README.md CHANGED Viewed

@@ -84,6 +84,7 @@ git push hf main
 API_UR="https://dashscope.aliyuncs.com/compatible-mode/v1"
 API_KEY="你的Key"
 USE_MOCK_MODELS=0
 # TTS 走 HF Space（优先）
 HF_TTS_SPACE_ID="your-org/audio"
@@ -100,6 +101,7 @@ HF_TTS_ALLOW_FALLBACK=1
 - 如果你更希望用完整 URL，可以改为 `HF_TTS_SPACE_URL="https://your-org-audio.hf.space"`。
 - 如果不想回退到原 TTS 接口，设置 `HF_TTS_ALLOW_FALLBACK=0`。
 ## 角色目录结构（自动发现）

 API_UR="https://dashscope.aliyuncs.com/compatible-mode/v1"
 API_KEY="你的Key"
 USE_MOCK_MODELS=0
+USE_MOCK_TTS=0
 # TTS 走 HF Space（优先）
 HF_TTS_SPACE_ID="your-org/audio"
 - 如果你更希望用完整 URL，可以改为 `HF_TTS_SPACE_URL="https://your-org-audio.hf.space"`。
 - 如果不想回退到原 TTS 接口，设置 `HF_TTS_ALLOW_FALLBACK=0`。
+- 如果只想 mock 文本/题目，但 TTS 用真实接口：`USE_MOCK_MODELS=1` 且 `USE_MOCK_TTS=0`。
 ## 角色目录结构（自动发现）

app.py CHANGED Viewed

@@ -63,6 +63,7 @@ _load_dotenv_file(APP_DIR / ".env")
 API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
 API_KEY = os.getenv("API_KEY", "")
 USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
 CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
 TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
 TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
@@ -258,6 +259,7 @@ def new_session_state() -> Dict[str, Any]:
     return {
         "lecture_text": "",
         "lecture_audio_path": None,
         "explanation_audio_path": None,
         "last_explanation_tts_text": "",
         "pdf_path": None,
@@ -416,10 +418,10 @@ def _is_hf_tts_enabled() -> bool:
 def _tts_backend_name() -> str:
     if _is_hf_tts_enabled():
         return f"hf_space:{HF_TTS_SPACE_ID or HF_TTS_SPACE_URL}"
-    if USE_MOCK_MODELS:
-        return "mock_tts"
     return "api_tts"
@@ -492,6 +494,34 @@ def split_text_for_tts(text: str, max_len: int = 480) -> List[str]:
     return chunks
 def concat_wav_files(wav_paths: List[str], out_path: str) -> str:
     if not wav_paths:
         raise RuntimeError("No WAV chunks to concatenate.")
@@ -525,7 +555,8 @@ class QwenPipelineEngine:
     PDF -> lecture text -> MCQs -> TTS audio
     This ships with a mock mode by default so the workflow is runnable immediately.
-    When USE_MOCK_MODELS=0, it calls remote APIs:
     - VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
     - TTS: HF Space /tts_chunk (optional) or DashScope/OpenAI-compatible endpoints
     """
@@ -551,11 +582,11 @@ class QwenPipelineEngine:
     def ensure_tts_loaded(self) -> None:
         if self.tts_loaded:
             return
-        if _is_hf_tts_enabled():
-            self._ensure_hf_tts_client()
             self.tts_loaded = True
             return
-        if self.mock_mode:
             self.tts_loaded = True
             return
         _require_api_url()
@@ -586,33 +617,62 @@ class QwenPipelineEngine:
         return self._hf_tts_client
     def _hf_space_tts_single(self, text: str, out_path: str, *, voice: str, language: str) -> str:
-        client = self._ensure_hf_tts_client()
         configured = (HF_TTS_API_NAME or "").strip()
         normalized = configured.lstrip("/")
-        api_candidates: List[str] = []
-        for cand in [configured, f"/{normalized}" if normalized else "", normalized, "/tts_chunk", "tts_chunk", "/predict", "predict"]:
-            cand = cand.strip()
-            if cand and cand not in api_candidates:
-                api_candidates.append(cand)
         result: Any = None
         last_exc: Optional[Exception] = None
-        for api_name in api_candidates:
-            try:
-                result = client.predict(
-                    text=text,
-                    voice=voice,
-                    language=language,
-                    api_name=api_name,
-                )
-                last_exc = None
                 break
-            except Exception as exc:
-                msg = str(exc)
-                if "Cannot find a function with api_name" in msg:
-                    last_exc = exc
-                    continue
-                raise
         if last_exc is not None:
             available_hint = ""
             view_api = getattr(client, "view_api", None)
@@ -757,7 +817,7 @@ class QwenPipelineEngine:
             except Exception as exc:
                 if not HF_TTS_ALLOW_FALLBACK:
                     raise RuntimeError(f"HF Space TTS failed and fallback is disabled: {type(exc).__name__}: {exc}")
-                if self.mock_mode:
                     return write_tone_wav(text, out_path)
         openai_url = f"{_require_api_url()}/audio/speech"
@@ -808,12 +868,8 @@ class QwenPipelineEngine:
             raise RuntimeError(f"Failed to download TTS audio {audio_resp.status_code}: {audio_resp.text[:500]}")
         return _save_binary_audio(audio_resp.content, out_path)
-    def _real_tts(self, text: str, out_path: str, *, voice: Optional[str] = None) -> str:
-        if not TEXT_SPLIT_TO_CHUNK:
-            if not str(text or "").strip():
-                return write_tone_wav("empty", out_path)
-            return self._real_tts_single(str(text), out_path, voice=voice)
-        chunks = split_text_for_tts(text, max_len=480)
         if not chunks:
             return write_tone_wav("empty", out_path)
         if len(chunks) == 1:
@@ -825,6 +881,31 @@ class QwenPipelineEngine:
             chunk_paths.append(self._real_tts_single(chunk, chunk_path, voice=voice))
         return concat_wav_files(chunk_paths, out_path)
     @spaces.GPU
     def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         self.ensure_vl_loaded()
@@ -942,7 +1023,7 @@ class QwenPipelineEngine:
     def synthesize_tts(self, text: str, name_prefix: str = "audio", *, voice: Optional[str] = None) -> str:
         self.ensure_tts_loaded()
         out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
-        if self.mock_mode and not _is_hf_tts_enabled():
             return write_tone_wav(text, out_path)
         return self._real_tts(text, out_path, voice=voice)
@@ -1416,6 +1497,7 @@ def reset_ui_from_state(
     submit_interactive = quiz_ready and not state.get("completed", False)
     radio_interactive = submit_interactive
     lecture_tts_ready = bool(state.get("lecture_text"))
     if state.get("completed"):
         radio_interactive = False
     return (
@@ -1428,7 +1510,13 @@ def reset_ui_from_state(
         gr.update(visible=show_explain_page),
         gr.update(visible=show_exam_page),
         state.get("status", "Idle"),
-        build_clickable_lecture_html(state.get("lecture_text", "")),
         state.get("lecture_audio_path", None),
         gr.update(interactive=lecture_tts_ready),
         gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
@@ -1465,6 +1553,7 @@ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any
         state["lecture_text"] = lecture_text
         state["lecture_audio_path"] = None
         state["explanation_audio_path"] = None
         state["last_explanation_tts_text"] = ""
         state["pdf_path"] = pdf_file
@@ -1712,6 +1801,7 @@ def on_character_change(character_id: str, state: Dict[str, Any]):
     state["character_id"] = cfg["id"]
     state["current_page"] = "explain"
     state["lecture_audio_path"] = None
     state["explanation_audio_path"] = None
     state["last_explanation_tts_text"] = ""
     # Keep generated content if user wants to compare, but hide result pages until next generate.
@@ -1739,14 +1829,26 @@ def tts_voice_for_character(character_id: Optional[str]) -> str:
 def play_lecture_audio(state: Dict[str, Any]):
     if not state.get("lecture_text"):
         state["status"] = "No lecture text available."
-        return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
     backend = _tts_backend_name()
     voice = tts_voice_for_character(state.get("character_id"))
     try:
         state["status"] = f"Generating lecture audio ({backend})..."
         state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
         state["status"] = "Lecture audio ready."
-        return state, state["status"], state["lecture_audio_path"], f"Lecture audio generated via `{backend}`."
     except Exception as exc:
         state["status"] = "Lecture audio generation failed."
         return (
@@ -1754,6 +1856,7 @@ def play_lecture_audio(state: Dict[str, Any]):
             state["status"],
             state.get("lecture_audio_path"),
             f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
         )
@@ -1765,14 +1868,39 @@ def split_lecture_paragraphs(text: str) -> List[str]:
     return [p.strip() for p in pieces if p and p.strip()]
-def build_clickable_lecture_html(lecture_text: str) -> str:
     paragraphs = split_lecture_paragraphs(lecture_text)
     if not paragraphs:
         return '<div class="lecture-empty">Generated lecture explanation will appear here...</div>'
     parts: List[str] = ['<div class="lecture-clickable">']
     for i, p in enumerate(paragraphs):
         safe = html.escape(p, quote=False).replace("\n", "<br>")
-        parts.append(f'<div class="lecture-paragraph" data-idx="{i}">{safe}</div>')
     parts.append("</div>")
     return "".join(parts)
@@ -1782,7 +1910,13 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
     paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
     if not paragraphs:
         state["status"] = "暂无讲解内容。"
-        return state, state.get("status", "Idle"), state.get("lecture_audio_path"), "请先生成讲解。"
     try:
         idx = int(str(paragraph_idx or "").strip())
@@ -1790,11 +1924,18 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
         idx = -1
     if idx < 0 or idx >= len(paragraphs):
         state["status"] = "段落选择无效。"
-        return state, state.get("status", "Idle"), state.get("lecture_audio_path"), "请重新点击要播放的段落。"
     backend = _tts_backend_name()
     voice = tts_voice_for_character(state.get("character_id"))
     try:
         state["status"] = f"正在生成段落语音（{backend}）..."
         audio_path = engine.synthesize_tts(
             paragraphs[idx],
@@ -1803,10 +1944,23 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
         )
         state["lecture_audio_path"] = audio_path
         state["status"] = "段落语音已生成。"
-        return state, state["status"], audio_path, f"已生成第 {idx+1}/{len(paragraphs)} 段语音，可在下方播放。"
     except Exception as exc:
         state["status"] = "段落语音生成失败。"
-        return state, state["status"], state.get("lecture_audio_path"), f"TTS error via `{backend}`: {type(exc).__name__}: {exc}"
 def play_explanation_audio(state: Dict[str, Any]):
@@ -1825,6 +1979,32 @@ def play_explanation_audio(state: Dict[str, Any]):
         return state, state["status"], state.get("explanation_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
 def build_css() -> str:
     bg_css = ""
@@ -2130,8 +2310,16 @@ body {{
   line-height: 1.45 !important;
   color: rgba(244,246,251,0.95) !important;
 }}
 #lecture-clickable .lecture-paragraph {{
   cursor: pointer;
   padding: 10px 12px;
   border-radius: 14px;
   margin: 0 0 10px 0;
@@ -2145,6 +2333,18 @@ body {{
   background: rgba(255,255,255,0.08);
   border-color: rgba(255,255,255,0.14);
 }}
 .lecture-empty {{
   padding: 10px 12px;
   color: rgba(244,246,251,0.72);
@@ -2367,6 +2567,18 @@ body {{
     background: rgba(15, 23, 42, 0.06);
     border-color: rgba(15, 23, 42, 0.16);
   }}
   .lecture-empty {{
     color: rgba(15, 23, 42, 0.72);
   }}
@@ -2739,7 +2951,7 @@ with gr.Blocks(css=CSS) as demo:
             () => {
               const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
               if (state.bound) return;
-              state.bound = true;
               const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
               const rootCandidates = [
                 document,
@@ -2786,30 +2998,112 @@ with gr.Blocks(css=CSS) as demo:
                 state.observer = new MutationObserver(() => bindAudioLoading());
                 state.observer.observe(document.body, { childList: true, subtree: true, attributes: true });
               }
-              document.addEventListener(
-                "click",
-                (e) => {
-                  const path = (e && typeof e.composedPath === "function") ? e.composedPath() : [];
-                  let para = null;
-                  for (const n of path) {
-                    if (n && n.classList && n.classList.contains("lecture-paragraph")) { para = n; break; }
                   }
-                  if (!para) return;
-                  const idx = para.getAttribute("data-idx");
-                  const input = q("#selected-paragraph textarea, #selected-paragraph input");
-                  const btn = q("#play-paragraph-btn button");
-                  if (!input || !btn) {
-                    showLoading("未找到段落播放控件，请刷新页面重试。");
-                    return;
                   }
-                  input.value = idx || "";
-                  input.dispatchEvent(new Event("input", { bubbles: true }));
-                  input.dispatchEvent(new Event("change", { bubbles: true }));
-                  showLoading("正在生成语音...");
-                  btn.click();
-                },
-                true
-              );
             }
             """,
         )
@@ -2827,7 +3121,15 @@ with gr.Blocks(css=CSS) as demo:
                         )
                     with gr.Row(elem_id="lecture-actions"):
                         play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
-                    gr.Markdown("提示：点击任意段落可生成该段语音并在下方播放。", elem_id="paragraph-tts-tip")
                     lecture_feedback = gr.Markdown("")
                     with gr.Row(elem_id="exam-entry-wrap"):
                         exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
@@ -2943,6 +3245,7 @@ with gr.Blocks(css=CSS) as demo:
         exam_page,
         status_box,
         lecture_box,
         lecture_audio,
         play_lecture_btn,
         exam_btn,
@@ -2969,15 +3272,21 @@ with gr.Blocks(css=CSS) as demo:
     submit_btn.click(fn=submit_answer, inputs=[choice_radio, state], outputs=outputs, show_progress="hidden")
     restart_btn.click(fn=restart_quiz, inputs=[state], outputs=outputs, show_progress="hidden")
     play_lecture_btn.click(
-        fn=play_lecture_audio,
         inputs=[state],
-        outputs=[state, status_box, lecture_audio, lecture_feedback],
         show_progress="minimal",
     )
     play_paragraph_btn.click(
-        fn=play_lecture_paragraph_audio,
         inputs=[paragraph_idx, state],
-        outputs=[state, status_box, lecture_audio, lecture_feedback],
         show_progress="minimal",
     )

 API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
 API_KEY = os.getenv("API_KEY", "")
 USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
+USE_MOCK_TTS = os.getenv("USE_MOCK_TTS", "0") == "1"
 CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
 TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
 TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
     return {
         "lecture_text": "",
         "lecture_audio_path": None,
+        "selected_paragraph_idx": "",
         "explanation_audio_path": None,
         "last_explanation_tts_text": "",
         "pdf_path": None,
 def _tts_backend_name() -> str:
+    if USE_MOCK_TTS:
+        return "mock_tts"
     if _is_hf_tts_enabled():
         return f"hf_space:{HF_TTS_SPACE_ID or HF_TTS_SPACE_URL}"
     return "api_tts"
     return chunks
+def split_text_every_two_sentences(text: str, max_len: int = 480) -> List[str]:
+    cleaned = re.sub(r"\s+", " ", (text or "")).strip()
+    if not cleaned:
+        return []
+    if len(cleaned) <= max_len:
+        return [cleaned]
+    sentences = [s.strip() for s in re.split(r"(?<=[。！？!?；;:：\.])\s*", cleaned) if s and s.strip()]
+    if not sentences:
+        return split_text_for_tts(cleaned, max_len=max_len)
+    groups: List[str] = []
+    i = 0
+    while i < len(sentences):
+        pair = " ".join(sentences[i:i + 2]).strip()
+        if pair:
+            groups.append(pair)
+        i += 2
+    chunks: List[str] = []
+    for g in groups:
+        if len(g) <= max_len:
+            chunks.append(g)
+        else:
+            chunks.extend(split_text_for_tts(g, max_len=max_len))
+    return [c for c in chunks if c and c.strip()]
 def concat_wav_files(wav_paths: List[str], out_path: str) -> str:
     if not wav_paths:
         raise RuntimeError("No WAV chunks to concatenate.")
     PDF -> lecture text -> MCQs -> TTS audio
     This ships with a mock mode by default so the workflow is runnable immediately.
+    When USE_MOCK_MODELS=0, it calls remote APIs for text generation.
+    TTS mock is controlled separately by USE_MOCK_TTS.
     - VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
     - TTS: HF Space /tts_chunk (optional) or DashScope/OpenAI-compatible endpoints
     """
     def ensure_tts_loaded(self) -> None:
         if self.tts_loaded:
             return
+        if USE_MOCK_TTS:
             self.tts_loaded = True
             return
+        if _is_hf_tts_enabled():
+            self._ensure_hf_tts_client()
             self.tts_loaded = True
             return
         _require_api_url()
         return self._hf_tts_client
     def _hf_space_tts_single(self, text: str, out_path: str, *, voice: str, language: str) -> str:
         configured = (HF_TTS_API_NAME or "").strip()
         normalized = configured.lstrip("/")
         result: Any = None
         last_exc: Optional[Exception] = None
+        api_candidates: List[str] = []
+        for attempt in range(2):
+            client = self._ensure_hf_tts_client()
+            api_prefix = ""
+            cfg = getattr(client, "config", None)
+            if isinstance(cfg, dict):
+                api_prefix = str(cfg.get("api_prefix") or "").strip()
+            api_candidates = []
+            prefixed = f"{api_prefix.rstrip('/')}/{normalized}" if api_prefix and normalized else ""
+            for cand in [
+                configured,
+                f"/{normalized}" if normalized else "",
+                normalized,
+                prefixed,
+                "/gradio_api/tts_chunk",
+                "gradio_api/tts_chunk",
+                "/tts_chunk",
+                "tts_chunk",
+                "/predict",
+                "predict",
+            ]:
+                cand = cand.strip()
+                if cand and cand not in api_candidates:
+                    api_candidates.append(cand)
+            result = None
+            last_exc = None
+            for api_name in api_candidates:
+                try:
+                    result = client.predict(
+                        text=text,
+                        voice=voice,
+                        language=language,
+                        api_name=api_name,
+                    )
+                    last_exc = None
+                    break
+                except Exception as exc:
+                    msg = str(exc)
+                    lower_msg = msg.lower()
+                    if ("cannot find a function" in lower_msg) and ("api_name" in lower_msg):
+                        last_exc = exc
+                        continue
+                    raise
+            if last_exc is None:
                 break
+            # Refresh cached client once in case the upstream app reloaded and endpoints changed.
+            if attempt == 0:
+                self._hf_tts_client = None
         if last_exc is not None:
             available_hint = ""
             view_api = getattr(client, "view_api", None)
             except Exception as exc:
                 if not HF_TTS_ALLOW_FALLBACK:
                     raise RuntimeError(f"HF Space TTS failed and fallback is disabled: {type(exc).__name__}: {exc}")
+                if USE_MOCK_TTS:
                     return write_tone_wav(text, out_path)
         openai_url = f"{_require_api_url()}/audio/speech"
             raise RuntimeError(f"Failed to download TTS audio {audio_resp.status_code}: {audio_resp.text[:500]}")
         return _save_binary_audio(audio_resp.content, out_path)
+    def _synthesize_tts_chunks(self, chunks: List[str], out_path: str, *, voice: Optional[str] = None) -> str:
+        chunks = [str(c or "").strip() for c in chunks if str(c or "").strip()]
         if not chunks:
             return write_tone_wav("empty", out_path)
         if len(chunks) == 1:
             chunk_paths.append(self._real_tts_single(chunk, chunk_path, voice=voice))
         return concat_wav_files(chunk_paths, out_path)
+    def _real_tts(self, text: str, out_path: str, *, voice: Optional[str] = None) -> str:
+        cleaned = str(text or "").strip()
+        if not cleaned:
+            return write_tone_wav("empty", out_path)
+        if TEXT_SPLIT_TO_CHUNK:
+            return self._synthesize_tts_chunks(split_text_for_tts(cleaned, max_len=480), out_path, voice=voice)
+        try:
+            return self._real_tts_single(cleaned, out_path, voice=voice)
+        except Exception as exc:
+            err = str(exc).lower()
+            too_long = (
+                "text too long" in err
+                or "too long for chunk-level api" in err
+                or "chunk-level api" in err
+            )
+            if not too_long:
+                raise
+            return self._synthesize_tts_chunks(
+                split_text_every_two_sentences(cleaned, max_len=480),
+                out_path,
+                voice=voice,
+            )
     @spaces.GPU
     def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         self.ensure_vl_loaded()
     def synthesize_tts(self, text: str, name_prefix: str = "audio", *, voice: Optional[str] = None) -> str:
         self.ensure_tts_loaded()
         out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
+        if USE_MOCK_TTS:
             return write_tone_wav(text, out_path)
         return self._real_tts(text, out_path, voice=voice)
     submit_interactive = quiz_ready and not state.get("completed", False)
     radio_interactive = submit_interactive
     lecture_tts_ready = bool(state.get("lecture_text"))
+    selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
     if state.get("completed"):
         radio_interactive = False
     return (
         gr.update(visible=show_explain_page),
         gr.update(visible=show_exam_page),
         state.get("status", "Idle"),
+        build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        gr.update(
+            choices=paragraph_picker_choices(state.get("lecture_text", "")),
+            value=selected_paragraph_value,
+            interactive=lecture_tts_ready,
+            visible=lecture_tts_ready,
+        ),
         state.get("lecture_audio_path", None),
         gr.update(interactive=lecture_tts_ready),
         gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
         state["lecture_text"] = lecture_text
         state["lecture_audio_path"] = None
+        state["selected_paragraph_idx"] = ""
         state["explanation_audio_path"] = None
         state["last_explanation_tts_text"] = ""
         state["pdf_path"] = pdf_file
     state["character_id"] = cfg["id"]
     state["current_page"] = "explain"
     state["lecture_audio_path"] = None
+    state["selected_paragraph_idx"] = ""
     state["explanation_audio_path"] = None
     state["last_explanation_tts_text"] = ""
     # Keep generated content if user wants to compare, but hide result pages until next generate.
 def play_lecture_audio(state: Dict[str, Any]):
     if not state.get("lecture_text"):
         state["status"] = "No lecture text available."
+        return (
+            state,
+            state["status"],
+            state.get("lecture_audio_path"),
+            "Generate lecture first.",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
     backend = _tts_backend_name()
     voice = tts_voice_for_character(state.get("character_id"))
     try:
         state["status"] = f"Generating lecture audio ({backend})..."
         state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
         state["status"] = "Lecture audio ready."
+        return (
+            state,
+            state["status"],
+            state["lecture_audio_path"],
+            f"Lecture audio generated via `{backend}`.",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
     except Exception as exc:
         state["status"] = "Lecture audio generation failed."
         return (
             state["status"],
             state.get("lecture_audio_path"),
             f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
         )
     return [p.strip() for p in pieces if p and p.strip()]
+def paragraph_picker_choices(lecture_text: str) -> List[tuple[str, str]]:
+    paragraphs = split_lecture_paragraphs(lecture_text)
+    choices: List[tuple[str, str]] = []
+    for i, p in enumerate(paragraphs):
+        preview = re.sub(r"\s+", " ", str(p or "")).strip()
+        if len(preview) > 110:
+            preview = preview[:107].rstrip() + "..."
+        choices.append((f"Chunk {i + 1}: {preview}", str(i)))
+    return choices
+def build_clickable_lecture_html(lecture_text: str, selected_idx: str = "") -> str:
     paragraphs = split_lecture_paragraphs(lecture_text)
     if not paragraphs:
         return '<div class="lecture-empty">Generated lecture explanation will appear here...</div>'
+    selected = str(selected_idx or "").strip()
     parts: List[str] = ['<div class="lecture-clickable">']
     for i, p in enumerate(paragraphs):
         safe = html.escape(p, quote=False).replace("\n", "<br>")
+        selected_cls = " is-selected" if selected and selected == str(i) else ""
+        selected_style = (
+            "background: #f97316 !important; "
+            "border-color: #f97316 !important; "
+            "box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important; "
+            "color: #ffffff !important;"
+            if selected_cls
+            else ""
+        )
+        parts.append(
+            f'<div class="lecture-paragraph{selected_cls}" data-idx="{i}" '
+            f'style="{selected_style}" '
+            f'onclick="window.__lectureSelectParagraph && window.__lectureSelectParagraph({i}, this, true);">{safe}</div>'
+        )
     parts.append("</div>")
     return "".join(parts)
     paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
     if not paragraphs:
         state["status"] = "暂无讲解内容。"
+        return (
+            state,
+            state.get("status", "Idle"),
+            state.get("lecture_audio_path"),
+            "请先生成讲解。",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
     try:
         idx = int(str(paragraph_idx or "").strip())
         idx = -1
     if idx < 0 or idx >= len(paragraphs):
         state["status"] = "段落选择无效。"
+        return (
+            state,
+            state.get("status", "Idle"),
+            state.get("lecture_audio_path"),
+            "请重新点击要播放的段落。",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
     backend = _tts_backend_name()
     voice = tts_voice_for_character(state.get("character_id"))
     try:
+        state["selected_paragraph_idx"] = str(idx)
         state["status"] = f"正在生成段落语音（{backend}）..."
         audio_path = engine.synthesize_tts(
             paragraphs[idx],
         )
         state["lecture_audio_path"] = audio_path
         state["status"] = "段落语音已生成。"
+        char_len = len(paragraphs[idx])
+        return (
+            state,
+            state["status"],
+            audio_path,
+            f"已生成第 {idx+1}/{len(paragraphs)} 段语音（{char_len} 字符），可在下方播放。",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
     except Exception as exc:
         state["status"] = "段落语音生成失败。"
+        return (
+            state,
+            state["status"],
+            state.get("lecture_audio_path"),
+            f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
+            build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
+        )
 def play_explanation_audio(state: Dict[str, Any]):
         return state, state["status"], state.get("explanation_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
+def on_play_lecture_audio_click(state: Dict[str, Any]):
+    state, status, audio_path, feedback, lecture_html = play_lecture_audio(state)
+    selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
+    return (
+        state,
+        status,
+        audio_path,
+        feedback,
+        lecture_html,
+        gr.update(value=selected_paragraph_value),
+    )
+def on_play_paragraph_click(paragraph_idx: str, state: Dict[str, Any]):
+    state, status, audio_path, feedback, lecture_html = play_lecture_paragraph_audio(paragraph_idx, state)
+    selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
+    return (
+        state,
+        status,
+        audio_path,
+        feedback,
+        lecture_html,
+        gr.update(value=selected_paragraph_value),
+    )
 def build_css() -> str:
     bg_css = ""
   line-height: 1.45 !important;
   color: rgba(244,246,251,0.95) !important;
 }}
+#lecture-clickable,
+#lecture-clickable .html-container,
+#lecture-clickable .html-container *,
+#lecture-clickable .lecture-clickable,
+#lecture-clickable .lecture-clickable * {{
+  pointer-events: auto !important;
+}}
 #lecture-clickable .lecture-paragraph {{
   cursor: pointer;
+  pointer-events: auto !important;
   padding: 10px 12px;
   border-radius: 14px;
   margin: 0 0 10px 0;
   background: rgba(255,255,255,0.08);
   border-color: rgba(255,255,255,0.14);
 }}
+#lecture-clickable .lecture-paragraph.is-selected {{
+  background: #f97316 !important;
+  border-color: #f97316 !important;
+  box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
+  color: #ffffff !important;
+}}
+#lecture-clickable .lecture-paragraph[data-selected="1"] {{
+  background: #f97316 !important;
+  border-color: #f97316 !important;
+  box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
+  color: #ffffff !important;
+}}
 .lecture-empty {{
   padding: 10px 12px;
   color: rgba(244,246,251,0.72);
     background: rgba(15, 23, 42, 0.06);
     border-color: rgba(15, 23, 42, 0.16);
   }}
+  #lecture-clickable .lecture-paragraph.is-selected {{
+    background: #f97316 !important;
+    border-color: #f97316 !important;
+    box-shadow: 0 0 0 1px rgba(255,255,255,0.18) inset !important;
+    color: #ffffff !important;
+  }}
+  #lecture-clickable .lecture-paragraph[data-selected="1"] {{
+    background: #f97316 !important;
+    border-color: #f97316 !important;
+    box-shadow: 0 0 0 1px rgba(255,255,255,0.18) inset !important;
+    color: #ffffff !important;
+  }}
   .lecture-empty {{
     color: rgba(15, 23, 42, 0.72);
   }}
             () => {
               const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
               if (state.bound) return;
+              try {
               const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
               const rootCandidates = [
                 document,
                 state.observer = new MutationObserver(() => bindAudioLoading());
                 state.observer.observe(document.body, { childList: true, subtree: true, attributes: true });
               }
+              const selectParagraph = (idx, para, autoPlay) => {
+                const indexText = String(idx ?? "").trim();
+                const selectedInlineStyle = {
+                  background: "#f97316",
+                  borderColor: "#f97316",
+                  boxShadow: "0 0 0 1px rgba(255,255,255,0.16) inset",
+                  color: "#ffffff",
+                };
+                for (const r of rootCandidates) {
+                  const nodes = r.querySelectorAll ? r.querySelectorAll("#lecture-clickable .lecture-paragraph.is-selected") : [];
+                  for (const node of nodes) {
+                    node.classList.remove("is-selected");
+                    node.removeAttribute("data-selected");
+                    if (node.style) {
+                      node.style.removeProperty("background");
+                      node.style.removeProperty("border-color");
+                      node.style.removeProperty("box-shadow");
+                      node.style.removeProperty("color");
+                    }
                   }
+                }
+                if (para && para.classList) {
+                  para.classList.add("is-selected");
+                  para.setAttribute("data-selected", "1");
+                  if (para.style) {
+                    para.style.setProperty("background", selectedInlineStyle.background, "important");
+                    para.style.setProperty("border-color", selectedInlineStyle.borderColor, "important");
+                    para.style.setProperty("box-shadow", selectedInlineStyle.boxShadow, "important");
+                    para.style.setProperty("color", selectedInlineStyle.color, "important");
                   }
+                }
+                let input = q("#selected-paragraph textarea, #selected-paragraph input");
+                if (!input) {
+                  const inputWrap = q("#selected-paragraph");
+                  input = inputWrap && inputWrap.querySelector ? inputWrap.querySelector("textarea, input") : null;
+                }
+                if (!input) {
+                  showLoading("未找到段落选择控件，请刷新页面重试。");
+                  return;
+                }
+                input.value = indexText;
+                input.dispatchEvent(new Event("input", { bubbles: true }));
+                input.dispatchEvent(new Event("change", { bubbles: true }));
+                if (!autoPlay) return;
+                let btn = q("#play-paragraph-btn button, #play-paragraph-btn");
+                if (btn && btn.querySelector && btn.tagName !== "BUTTON") {
+                  const innerBtn = btn.querySelector("button");
+                  if (innerBtn) btn = innerBtn;
+                }
+                if (!btn) {
+                  showLoading("未找到段落播放控件，请刷新页面重试。");
+                  return;
+                }
+                showLoading("正在生成语音...");
+                btn.click();
+              };
+              window.__lectureSelectParagraph = (idx, el, autoPlay = true) => {
+                selectParagraph(idx, el, autoPlay);
+              };
+              const paragraphFromEvent = (e) => {
+                const target = e ? e.target : null;
+                if (target && target.nodeType === 1 && target.closest) {
+                  const p = target.closest(".lecture-paragraph");
+                  if (p) return p;
+                }
+                const path = (e && typeof e.composedPath === "function") ? e.composedPath() : [];
+                for (const n of path) {
+                  if (n && n.classList && n.classList.contains("lecture-paragraph")) return n;
+                }
+                return null;
+              };
+              const onParagraphClick = (e) => {
+                const para = paragraphFromEvent(e);
+                if (!para) return;
+                const idx = para.getAttribute("data-idx");
+                if (typeof idx !== "string" || idx.trim() === "") return;
+                selectParagraph(idx, para, true);
+              };
+              const bindClickRoot = (root) => {
+                if (!root || !root.addEventListener) return;
+                if (root.__lectureClickBound) return;
+                root.__lectureClickBound = true;
+                root.addEventListener("click", onParagraphClick, true);
+              };
+              for (const r of rootCandidates) bindClickRoot(r);
+              bindClickRoot(window);
+              if (!state.rebindObserver) {
+                state.rebindObserver = new MutationObserver(() => {
+                  const nextRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
+                  for (const r of [document, nextRoot && nextRoot.shadowRoot ? nextRoot.shadowRoot : null, nextRoot]) {
+                    bindClickRoot(r);
+                  }
+                });
+                state.rebindObserver.observe(document.body, { childList: true, subtree: true });
+              }
+              state.bound = true;
+              } catch (err) {
+                state.bound = false;
+                try { console.error("lecture click bridge failed:", err); } catch (_) {}
+              }
             }
             """,
         )
                         )
                     with gr.Row(elem_id="lecture-actions"):
                         play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
+                    gr.Markdown("提示：可直接点击段落播放；若浏览器拦截点击，请使用下方 Chunk selector。", elem_id="paragraph-tts-tip")
+                    paragraph_picker = gr.Radio(
+                        choices=[],
+                        value=None,
+                        interactive=False,
+                        visible=False,
+                        label="Chunks (fallback selector)",
+                        elem_id="paragraph-picker",
+                    )
                     lecture_feedback = gr.Markdown("")
                     with gr.Row(elem_id="exam-entry-wrap"):
                         exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
         exam_page,
         status_box,
         lecture_box,
+        paragraph_picker,
         lecture_audio,
         play_lecture_btn,
         exam_btn,
     submit_btn.click(fn=submit_answer, inputs=[choice_radio, state], outputs=outputs, show_progress="hidden")
     restart_btn.click(fn=restart_quiz, inputs=[state], outputs=outputs, show_progress="hidden")
     play_lecture_btn.click(
+        fn=on_play_lecture_audio_click,
         inputs=[state],
+        outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
         show_progress="minimal",
     )
     play_paragraph_btn.click(
+        fn=on_play_paragraph_click,
         inputs=[paragraph_idx, state],
+        outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
+        show_progress="minimal",
+    )
+    paragraph_picker.change(
+        fn=on_play_paragraph_click,
+        inputs=[paragraph_picker, state],
+        outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
         show_progress="minimal",
     )