updated
Browse files
README.md
CHANGED
|
@@ -84,6 +84,7 @@ git push hf main
|
|
| 84 |
API_UR="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
| 85 |
API_KEY="你的Key"
|
| 86 |
USE_MOCK_MODELS=0
|
|
|
|
| 87 |
|
| 88 |
# TTS 走 HF Space(优先)
|
| 89 |
HF_TTS_SPACE_ID="your-org/audio"
|
|
@@ -100,6 +101,7 @@ HF_TTS_ALLOW_FALLBACK=1
|
|
| 100 |
|
| 101 |
- 如果你更希望用完整 URL,可以改为 `HF_TTS_SPACE_URL="https://your-org-audio.hf.space"`。
|
| 102 |
- 如果不想回退到原 TTS 接口,设置 `HF_TTS_ALLOW_FALLBACK=0`。
|
|
|
|
| 103 |
|
| 104 |
## 角色目录结构(自动发现)
|
| 105 |
|
|
|
|
| 84 |
API_UR="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
| 85 |
API_KEY="你的Key"
|
| 86 |
USE_MOCK_MODELS=0
|
| 87 |
+
USE_MOCK_TTS=0
|
| 88 |
|
| 89 |
# TTS 走 HF Space(优先)
|
| 90 |
HF_TTS_SPACE_ID="your-org/audio"
|
|
|
|
| 101 |
|
| 102 |
- 如果你更希望用完整 URL,可以改为 `HF_TTS_SPACE_URL="https://your-org-audio.hf.space"`。
|
| 103 |
- 如果不想回退到原 TTS 接口,设置 `HF_TTS_ALLOW_FALLBACK=0`。
|
| 104 |
+
- 如果只想 mock 文本/题目,但 TTS 用真实接口:`USE_MOCK_MODELS=1` 且 `USE_MOCK_TTS=0`。
|
| 105 |
|
| 106 |
## 角色目录结构(自动发现)
|
| 107 |
|
app.py
CHANGED
|
@@ -63,6 +63,7 @@ _load_dotenv_file(APP_DIR / ".env")
|
|
| 63 |
API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
|
| 64 |
API_KEY = os.getenv("API_KEY", "")
|
| 65 |
USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
|
|
|
|
| 66 |
CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
|
| 67 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 68 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
|
@@ -258,6 +259,7 @@ def new_session_state() -> Dict[str, Any]:
|
|
| 258 |
return {
|
| 259 |
"lecture_text": "",
|
| 260 |
"lecture_audio_path": None,
|
|
|
|
| 261 |
"explanation_audio_path": None,
|
| 262 |
"last_explanation_tts_text": "",
|
| 263 |
"pdf_path": None,
|
|
@@ -416,10 +418,10 @@ def _is_hf_tts_enabled() -> bool:
|
|
| 416 |
|
| 417 |
|
| 418 |
def _tts_backend_name() -> str:
|
|
|
|
|
|
|
| 419 |
if _is_hf_tts_enabled():
|
| 420 |
return f"hf_space:{HF_TTS_SPACE_ID or HF_TTS_SPACE_URL}"
|
| 421 |
-
if USE_MOCK_MODELS:
|
| 422 |
-
return "mock_tts"
|
| 423 |
return "api_tts"
|
| 424 |
|
| 425 |
|
|
@@ -492,6 +494,34 @@ def split_text_for_tts(text: str, max_len: int = 480) -> List[str]:
|
|
| 492 |
return chunks
|
| 493 |
|
| 494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
def concat_wav_files(wav_paths: List[str], out_path: str) -> str:
|
| 496 |
if not wav_paths:
|
| 497 |
raise RuntimeError("No WAV chunks to concatenate.")
|
|
@@ -525,7 +555,8 @@ class QwenPipelineEngine:
|
|
| 525 |
PDF -> lecture text -> MCQs -> TTS audio
|
| 526 |
|
| 527 |
This ships with a mock mode by default so the workflow is runnable immediately.
|
| 528 |
-
When USE_MOCK_MODELS=0, it calls remote APIs
|
|
|
|
| 529 |
- VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
|
| 530 |
- TTS: HF Space /tts_chunk (optional) or DashScope/OpenAI-compatible endpoints
|
| 531 |
"""
|
|
@@ -551,11 +582,11 @@ class QwenPipelineEngine:
|
|
| 551 |
def ensure_tts_loaded(self) -> None:
|
| 552 |
if self.tts_loaded:
|
| 553 |
return
|
| 554 |
-
if
|
| 555 |
-
self._ensure_hf_tts_client()
|
| 556 |
self.tts_loaded = True
|
| 557 |
return
|
| 558 |
-
if
|
|
|
|
| 559 |
self.tts_loaded = True
|
| 560 |
return
|
| 561 |
_require_api_url()
|
|
@@ -586,33 +617,62 @@ class QwenPipelineEngine:
|
|
| 586 |
return self._hf_tts_client
|
| 587 |
|
| 588 |
def _hf_space_tts_single(self, text: str, out_path: str, *, voice: str, language: str) -> str:
|
| 589 |
-
client = self._ensure_hf_tts_client()
|
| 590 |
configured = (HF_TTS_API_NAME or "").strip()
|
| 591 |
normalized = configured.lstrip("/")
|
| 592 |
-
api_candidates: List[str] = []
|
| 593 |
-
for cand in [configured, f"/{normalized}" if normalized else "", normalized, "/tts_chunk", "tts_chunk", "/predict", "predict"]:
|
| 594 |
-
cand = cand.strip()
|
| 595 |
-
if cand and cand not in api_candidates:
|
| 596 |
-
api_candidates.append(cand)
|
| 597 |
|
| 598 |
result: Any = None
|
| 599 |
last_exc: Optional[Exception] = None
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
break
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
continue
|
| 615 |
-
raise
|
| 616 |
if last_exc is not None:
|
| 617 |
available_hint = ""
|
| 618 |
view_api = getattr(client, "view_api", None)
|
|
@@ -757,7 +817,7 @@ class QwenPipelineEngine:
|
|
| 757 |
except Exception as exc:
|
| 758 |
if not HF_TTS_ALLOW_FALLBACK:
|
| 759 |
raise RuntimeError(f"HF Space TTS failed and fallback is disabled: {type(exc).__name__}: {exc}")
|
| 760 |
-
if
|
| 761 |
return write_tone_wav(text, out_path)
|
| 762 |
|
| 763 |
openai_url = f"{_require_api_url()}/audio/speech"
|
|
@@ -808,12 +868,8 @@ class QwenPipelineEngine:
|
|
| 808 |
raise RuntimeError(f"Failed to download TTS audio {audio_resp.status_code}: {audio_resp.text[:500]}")
|
| 809 |
return _save_binary_audio(audio_resp.content, out_path)
|
| 810 |
|
| 811 |
-
def
|
| 812 |
-
if
|
| 813 |
-
if not str(text or "").strip():
|
| 814 |
-
return write_tone_wav("empty", out_path)
|
| 815 |
-
return self._real_tts_single(str(text), out_path, voice=voice)
|
| 816 |
-
chunks = split_text_for_tts(text, max_len=480)
|
| 817 |
if not chunks:
|
| 818 |
return write_tone_wav("empty", out_path)
|
| 819 |
if len(chunks) == 1:
|
|
@@ -825,6 +881,31 @@ class QwenPipelineEngine:
|
|
| 825 |
chunk_paths.append(self._real_tts_single(chunk, chunk_path, voice=voice))
|
| 826 |
return concat_wav_files(chunk_paths, out_path)
|
| 827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
@spaces.GPU
|
| 829 |
def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 830 |
self.ensure_vl_loaded()
|
|
@@ -942,7 +1023,7 @@ class QwenPipelineEngine:
|
|
| 942 |
def synthesize_tts(self, text: str, name_prefix: str = "audio", *, voice: Optional[str] = None) -> str:
|
| 943 |
self.ensure_tts_loaded()
|
| 944 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
| 945 |
-
if
|
| 946 |
return write_tone_wav(text, out_path)
|
| 947 |
return self._real_tts(text, out_path, voice=voice)
|
| 948 |
|
|
@@ -1416,6 +1497,7 @@ def reset_ui_from_state(
|
|
| 1416 |
submit_interactive = quiz_ready and not state.get("completed", False)
|
| 1417 |
radio_interactive = submit_interactive
|
| 1418 |
lecture_tts_ready = bool(state.get("lecture_text"))
|
|
|
|
| 1419 |
if state.get("completed"):
|
| 1420 |
radio_interactive = False
|
| 1421 |
return (
|
|
@@ -1428,7 +1510,13 @@ def reset_ui_from_state(
|
|
| 1428 |
gr.update(visible=show_explain_page),
|
| 1429 |
gr.update(visible=show_exam_page),
|
| 1430 |
state.get("status", "Idle"),
|
| 1431 |
-
build_clickable_lecture_html(state.get("lecture_text", "")),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1432 |
state.get("lecture_audio_path", None),
|
| 1433 |
gr.update(interactive=lecture_tts_ready),
|
| 1434 |
gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
|
|
@@ -1465,6 +1553,7 @@ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any
|
|
| 1465 |
|
| 1466 |
state["lecture_text"] = lecture_text
|
| 1467 |
state["lecture_audio_path"] = None
|
|
|
|
| 1468 |
state["explanation_audio_path"] = None
|
| 1469 |
state["last_explanation_tts_text"] = ""
|
| 1470 |
state["pdf_path"] = pdf_file
|
|
@@ -1712,6 +1801,7 @@ def on_character_change(character_id: str, state: Dict[str, Any]):
|
|
| 1712 |
state["character_id"] = cfg["id"]
|
| 1713 |
state["current_page"] = "explain"
|
| 1714 |
state["lecture_audio_path"] = None
|
|
|
|
| 1715 |
state["explanation_audio_path"] = None
|
| 1716 |
state["last_explanation_tts_text"] = ""
|
| 1717 |
# Keep generated content if user wants to compare, but hide result pages until next generate.
|
|
@@ -1739,14 +1829,26 @@ def tts_voice_for_character(character_id: Optional[str]) -> str:
|
|
| 1739 |
def play_lecture_audio(state: Dict[str, Any]):
|
| 1740 |
if not state.get("lecture_text"):
|
| 1741 |
state["status"] = "No lecture text available."
|
| 1742 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1743 |
backend = _tts_backend_name()
|
| 1744 |
voice = tts_voice_for_character(state.get("character_id"))
|
| 1745 |
try:
|
| 1746 |
state["status"] = f"Generating lecture audio ({backend})..."
|
| 1747 |
state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
|
| 1748 |
state["status"] = "Lecture audio ready."
|
| 1749 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1750 |
except Exception as exc:
|
| 1751 |
state["status"] = "Lecture audio generation failed."
|
| 1752 |
return (
|
|
@@ -1754,6 +1856,7 @@ def play_lecture_audio(state: Dict[str, Any]):
|
|
| 1754 |
state["status"],
|
| 1755 |
state.get("lecture_audio_path"),
|
| 1756 |
f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
|
|
|
|
| 1757 |
)
|
| 1758 |
|
| 1759 |
|
|
@@ -1765,14 +1868,39 @@ def split_lecture_paragraphs(text: str) -> List[str]:
|
|
| 1765 |
return [p.strip() for p in pieces if p and p.strip()]
|
| 1766 |
|
| 1767 |
|
| 1768 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1769 |
paragraphs = split_lecture_paragraphs(lecture_text)
|
| 1770 |
if not paragraphs:
|
| 1771 |
return '<div class="lecture-empty">Generated lecture explanation will appear here...</div>'
|
|
|
|
| 1772 |
parts: List[str] = ['<div class="lecture-clickable">']
|
| 1773 |
for i, p in enumerate(paragraphs):
|
| 1774 |
safe = html.escape(p, quote=False).replace("\n", "<br>")
|
| 1775 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1776 |
parts.append("</div>")
|
| 1777 |
return "".join(parts)
|
| 1778 |
|
|
@@ -1782,7 +1910,13 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
|
|
| 1782 |
paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
|
| 1783 |
if not paragraphs:
|
| 1784 |
state["status"] = "暂无讲解内容。"
|
| 1785 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1786 |
|
| 1787 |
try:
|
| 1788 |
idx = int(str(paragraph_idx or "").strip())
|
|
@@ -1790,11 +1924,18 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
|
|
| 1790 |
idx = -1
|
| 1791 |
if idx < 0 or idx >= len(paragraphs):
|
| 1792 |
state["status"] = "段落选择无效。"
|
| 1793 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1794 |
|
| 1795 |
backend = _tts_backend_name()
|
| 1796 |
voice = tts_voice_for_character(state.get("character_id"))
|
| 1797 |
try:
|
|
|
|
| 1798 |
state["status"] = f"正在生成段落语音({backend})..."
|
| 1799 |
audio_path = engine.synthesize_tts(
|
| 1800 |
paragraphs[idx],
|
|
@@ -1803,10 +1944,23 @@ def play_lecture_paragraph_audio(paragraph_idx: str, state: Dict[str, Any]):
|
|
| 1803 |
)
|
| 1804 |
state["lecture_audio_path"] = audio_path
|
| 1805 |
state["status"] = "段落语音已生成。"
|
| 1806 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1807 |
except Exception as exc:
|
| 1808 |
state["status"] = "段落语音生成失败。"
|
| 1809 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1810 |
|
| 1811 |
|
| 1812 |
def play_explanation_audio(state: Dict[str, Any]):
|
|
@@ -1825,6 +1979,32 @@ def play_explanation_audio(state: Dict[str, Any]):
|
|
| 1825 |
return state, state["status"], state.get("explanation_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
|
| 1826 |
|
| 1827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1828 |
def build_css() -> str:
|
| 1829 |
bg_css = ""
|
| 1830 |
|
|
@@ -2130,8 +2310,16 @@ body {{
|
|
| 2130 |
line-height: 1.45 !important;
|
| 2131 |
color: rgba(244,246,251,0.95) !important;
|
| 2132 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2133 |
#lecture-clickable .lecture-paragraph {{
|
| 2134 |
cursor: pointer;
|
|
|
|
| 2135 |
padding: 10px 12px;
|
| 2136 |
border-radius: 14px;
|
| 2137 |
margin: 0 0 10px 0;
|
|
@@ -2145,6 +2333,18 @@ body {{
|
|
| 2145 |
background: rgba(255,255,255,0.08);
|
| 2146 |
border-color: rgba(255,255,255,0.14);
|
| 2147 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2148 |
.lecture-empty {{
|
| 2149 |
padding: 10px 12px;
|
| 2150 |
color: rgba(244,246,251,0.72);
|
|
@@ -2367,6 +2567,18 @@ body {{
|
|
| 2367 |
background: rgba(15, 23, 42, 0.06);
|
| 2368 |
border-color: rgba(15, 23, 42, 0.16);
|
| 2369 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2370 |
.lecture-empty {{
|
| 2371 |
color: rgba(15, 23, 42, 0.72);
|
| 2372 |
}}
|
|
@@ -2739,7 +2951,7 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2739 |
() => {
|
| 2740 |
const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
|
| 2741 |
if (state.bound) return;
|
| 2742 |
-
|
| 2743 |
const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
|
| 2744 |
const rootCandidates = [
|
| 2745 |
document,
|
|
@@ -2786,30 +2998,112 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2786 |
state.observer = new MutationObserver(() => bindAudioLoading());
|
| 2787 |
state.observer.observe(document.body, { childList: true, subtree: true, attributes: true });
|
| 2788 |
}
|
| 2789 |
-
|
| 2790 |
-
"
|
| 2791 |
-
|
| 2792 |
-
|
| 2793 |
-
|
| 2794 |
-
|
| 2795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2796 |
}
|
| 2797 |
-
|
| 2798 |
-
|
| 2799 |
-
|
| 2800 |
-
|
| 2801 |
-
if (
|
| 2802 |
-
|
| 2803 |
-
|
|
|
|
|
|
|
| 2804 |
}
|
| 2805 |
-
|
| 2806 |
-
|
| 2807 |
-
|
| 2808 |
-
|
| 2809 |
-
|
| 2810 |
-
|
| 2811 |
-
|
| 2812 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2813 |
}
|
| 2814 |
""",
|
| 2815 |
)
|
|
@@ -2827,7 +3121,15 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2827 |
)
|
| 2828 |
with gr.Row(elem_id="lecture-actions"):
|
| 2829 |
play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
|
| 2830 |
-
gr.Markdown("提示:点击
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2831 |
lecture_feedback = gr.Markdown("")
|
| 2832 |
with gr.Row(elem_id="exam-entry-wrap"):
|
| 2833 |
exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
|
|
@@ -2943,6 +3245,7 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2943 |
exam_page,
|
| 2944 |
status_box,
|
| 2945 |
lecture_box,
|
|
|
|
| 2946 |
lecture_audio,
|
| 2947 |
play_lecture_btn,
|
| 2948 |
exam_btn,
|
|
@@ -2969,15 +3272,21 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2969 |
submit_btn.click(fn=submit_answer, inputs=[choice_radio, state], outputs=outputs, show_progress="hidden")
|
| 2970 |
restart_btn.click(fn=restart_quiz, inputs=[state], outputs=outputs, show_progress="hidden")
|
| 2971 |
play_lecture_btn.click(
|
| 2972 |
-
fn=
|
| 2973 |
inputs=[state],
|
| 2974 |
-
outputs=[state, status_box, lecture_audio, lecture_feedback],
|
| 2975 |
show_progress="minimal",
|
| 2976 |
)
|
| 2977 |
play_paragraph_btn.click(
|
| 2978 |
-
fn=
|
| 2979 |
inputs=[paragraph_idx, state],
|
| 2980 |
-
outputs=[state, status_box, lecture_audio, lecture_feedback],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2981 |
show_progress="minimal",
|
| 2982 |
)
|
| 2983 |
|
|
|
|
| 63 |
API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
|
| 64 |
API_KEY = os.getenv("API_KEY", "")
|
| 65 |
USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
|
| 66 |
+
USE_MOCK_TTS = os.getenv("USE_MOCK_TTS", "0") == "1"
|
| 67 |
CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
|
| 68 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 69 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
|
|
|
| 259 |
return {
|
| 260 |
"lecture_text": "",
|
| 261 |
"lecture_audio_path": None,
|
| 262 |
+
"selected_paragraph_idx": "",
|
| 263 |
"explanation_audio_path": None,
|
| 264 |
"last_explanation_tts_text": "",
|
| 265 |
"pdf_path": None,
|
|
|
|
| 418 |
|
| 419 |
|
| 420 |
def _tts_backend_name() -> str:
|
| 421 |
+
if USE_MOCK_TTS:
|
| 422 |
+
return "mock_tts"
|
| 423 |
if _is_hf_tts_enabled():
|
| 424 |
return f"hf_space:{HF_TTS_SPACE_ID or HF_TTS_SPACE_URL}"
|
|
|
|
|
|
|
| 425 |
return "api_tts"
|
| 426 |
|
| 427 |
|
|
|
|
| 494 |
return chunks
|
| 495 |
|
| 496 |
|
| 497 |
+
def split_text_every_two_sentences(text: str, max_len: int = 480) -> List[str]:
|
| 498 |
+
cleaned = re.sub(r"\s+", " ", (text or "")).strip()
|
| 499 |
+
if not cleaned:
|
| 500 |
+
return []
|
| 501 |
+
if len(cleaned) <= max_len:
|
| 502 |
+
return [cleaned]
|
| 503 |
+
|
| 504 |
+
sentences = [s.strip() for s in re.split(r"(?<=[。!?!?;;::\.])\s*", cleaned) if s and s.strip()]
|
| 505 |
+
if not sentences:
|
| 506 |
+
return split_text_for_tts(cleaned, max_len=max_len)
|
| 507 |
+
|
| 508 |
+
groups: List[str] = []
|
| 509 |
+
i = 0
|
| 510 |
+
while i < len(sentences):
|
| 511 |
+
pair = " ".join(sentences[i:i + 2]).strip()
|
| 512 |
+
if pair:
|
| 513 |
+
groups.append(pair)
|
| 514 |
+
i += 2
|
| 515 |
+
|
| 516 |
+
chunks: List[str] = []
|
| 517 |
+
for g in groups:
|
| 518 |
+
if len(g) <= max_len:
|
| 519 |
+
chunks.append(g)
|
| 520 |
+
else:
|
| 521 |
+
chunks.extend(split_text_for_tts(g, max_len=max_len))
|
| 522 |
+
return [c for c in chunks if c and c.strip()]
|
| 523 |
+
|
| 524 |
+
|
| 525 |
def concat_wav_files(wav_paths: List[str], out_path: str) -> str:
|
| 526 |
if not wav_paths:
|
| 527 |
raise RuntimeError("No WAV chunks to concatenate.")
|
|
|
|
| 555 |
PDF -> lecture text -> MCQs -> TTS audio
|
| 556 |
|
| 557 |
This ships with a mock mode by default so the workflow is runnable immediately.
|
| 558 |
+
When USE_MOCK_MODELS=0, it calls remote APIs for text generation.
|
| 559 |
+
TTS mock is controlled separately by USE_MOCK_TTS.
|
| 560 |
- VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
|
| 561 |
- TTS: HF Space /tts_chunk (optional) or DashScope/OpenAI-compatible endpoints
|
| 562 |
"""
|
|
|
|
| 582 |
def ensure_tts_loaded(self) -> None:
|
| 583 |
if self.tts_loaded:
|
| 584 |
return
|
| 585 |
+
if USE_MOCK_TTS:
|
|
|
|
| 586 |
self.tts_loaded = True
|
| 587 |
return
|
| 588 |
+
if _is_hf_tts_enabled():
|
| 589 |
+
self._ensure_hf_tts_client()
|
| 590 |
self.tts_loaded = True
|
| 591 |
return
|
| 592 |
_require_api_url()
|
|
|
|
| 617 |
return self._hf_tts_client
|
| 618 |
|
| 619 |
def _hf_space_tts_single(self, text: str, out_path: str, *, voice: str, language: str) -> str:
|
|
|
|
| 620 |
configured = (HF_TTS_API_NAME or "").strip()
|
| 621 |
normalized = configured.lstrip("/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
result: Any = None
|
| 624 |
last_exc: Optional[Exception] = None
|
| 625 |
+
api_candidates: List[str] = []
|
| 626 |
+
for attempt in range(2):
|
| 627 |
+
client = self._ensure_hf_tts_client()
|
| 628 |
+
api_prefix = ""
|
| 629 |
+
cfg = getattr(client, "config", None)
|
| 630 |
+
if isinstance(cfg, dict):
|
| 631 |
+
api_prefix = str(cfg.get("api_prefix") or "").strip()
|
| 632 |
+
|
| 633 |
+
api_candidates = []
|
| 634 |
+
prefixed = f"{api_prefix.rstrip('/')}/{normalized}" if api_prefix and normalized else ""
|
| 635 |
+
for cand in [
|
| 636 |
+
configured,
|
| 637 |
+
f"/{normalized}" if normalized else "",
|
| 638 |
+
normalized,
|
| 639 |
+
prefixed,
|
| 640 |
+
"/gradio_api/tts_chunk",
|
| 641 |
+
"gradio_api/tts_chunk",
|
| 642 |
+
"/tts_chunk",
|
| 643 |
+
"tts_chunk",
|
| 644 |
+
"/predict",
|
| 645 |
+
"predict",
|
| 646 |
+
]:
|
| 647 |
+
cand = cand.strip()
|
| 648 |
+
if cand and cand not in api_candidates:
|
| 649 |
+
api_candidates.append(cand)
|
| 650 |
+
|
| 651 |
+
result = None
|
| 652 |
+
last_exc = None
|
| 653 |
+
for api_name in api_candidates:
|
| 654 |
+
try:
|
| 655 |
+
result = client.predict(
|
| 656 |
+
text=text,
|
| 657 |
+
voice=voice,
|
| 658 |
+
language=language,
|
| 659 |
+
api_name=api_name,
|
| 660 |
+
)
|
| 661 |
+
last_exc = None
|
| 662 |
+
break
|
| 663 |
+
except Exception as exc:
|
| 664 |
+
msg = str(exc)
|
| 665 |
+
lower_msg = msg.lower()
|
| 666 |
+
if ("cannot find a function" in lower_msg) and ("api_name" in lower_msg):
|
| 667 |
+
last_exc = exc
|
| 668 |
+
continue
|
| 669 |
+
raise
|
| 670 |
+
if last_exc is None:
|
| 671 |
break
|
| 672 |
+
# Refresh cached client once in case the upstream app reloaded and endpoints changed.
|
| 673 |
+
if attempt == 0:
|
| 674 |
+
self._hf_tts_client = None
|
| 675 |
+
|
|
|
|
|
|
|
| 676 |
if last_exc is not None:
|
| 677 |
available_hint = ""
|
| 678 |
view_api = getattr(client, "view_api", None)
|
|
|
|
| 817 |
except Exception as exc:
|
| 818 |
if not HF_TTS_ALLOW_FALLBACK:
|
| 819 |
raise RuntimeError(f"HF Space TTS failed and fallback is disabled: {type(exc).__name__}: {exc}")
|
| 820 |
+
if USE_MOCK_TTS:
|
| 821 |
return write_tone_wav(text, out_path)
|
| 822 |
|
| 823 |
openai_url = f"{_require_api_url()}/audio/speech"
|
|
|
|
| 868 |
raise RuntimeError(f"Failed to download TTS audio {audio_resp.status_code}: {audio_resp.text[:500]}")
|
| 869 |
return _save_binary_audio(audio_resp.content, out_path)
|
| 870 |
|
| 871 |
+
def _synthesize_tts_chunks(self, chunks: List[str], out_path: str, *, voice: Optional[str] = None) -> str:
|
| 872 |
+
chunks = [str(c or "").strip() for c in chunks if str(c or "").strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 873 |
if not chunks:
|
| 874 |
return write_tone_wav("empty", out_path)
|
| 875 |
if len(chunks) == 1:
|
|
|
|
| 881 |
chunk_paths.append(self._real_tts_single(chunk, chunk_path, voice=voice))
|
| 882 |
return concat_wav_files(chunk_paths, out_path)
|
| 883 |
|
| 884 |
+
def _real_tts(self, text: str, out_path: str, *, voice: Optional[str] = None) -> str:
|
| 885 |
+
cleaned = str(text or "").strip()
|
| 886 |
+
if not cleaned:
|
| 887 |
+
return write_tone_wav("empty", out_path)
|
| 888 |
+
|
| 889 |
+
if TEXT_SPLIT_TO_CHUNK:
|
| 890 |
+
return self._synthesize_tts_chunks(split_text_for_tts(cleaned, max_len=480), out_path, voice=voice)
|
| 891 |
+
|
| 892 |
+
try:
|
| 893 |
+
return self._real_tts_single(cleaned, out_path, voice=voice)
|
| 894 |
+
except Exception as exc:
|
| 895 |
+
err = str(exc).lower()
|
| 896 |
+
too_long = (
|
| 897 |
+
"text too long" in err
|
| 898 |
+
or "too long for chunk-level api" in err
|
| 899 |
+
or "chunk-level api" in err
|
| 900 |
+
)
|
| 901 |
+
if not too_long:
|
| 902 |
+
raise
|
| 903 |
+
return self._synthesize_tts_chunks(
|
| 904 |
+
split_text_every_two_sentences(cleaned, max_len=480),
|
| 905 |
+
out_path,
|
| 906 |
+
voice=voice,
|
| 907 |
+
)
|
| 908 |
+
|
| 909 |
@spaces.GPU
|
| 910 |
def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 911 |
self.ensure_vl_loaded()
|
|
|
|
| 1023 |
def synthesize_tts(self, text: str, name_prefix: str = "audio", *, voice: Optional[str] = None) -> str:
|
| 1024 |
self.ensure_tts_loaded()
|
| 1025 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
| 1026 |
+
if USE_MOCK_TTS:
|
| 1027 |
return write_tone_wav(text, out_path)
|
| 1028 |
return self._real_tts(text, out_path, voice=voice)
|
| 1029 |
|
|
|
|
| 1497 |
submit_interactive = quiz_ready and not state.get("completed", False)
|
| 1498 |
radio_interactive = submit_interactive
|
| 1499 |
lecture_tts_ready = bool(state.get("lecture_text"))
|
| 1500 |
+
selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
|
| 1501 |
if state.get("completed"):
|
| 1502 |
radio_interactive = False
|
| 1503 |
return (
|
|
|
|
| 1510 |
gr.update(visible=show_explain_page),
|
| 1511 |
gr.update(visible=show_exam_page),
|
| 1512 |
state.get("status", "Idle"),
|
| 1513 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1514 |
+
gr.update(
|
| 1515 |
+
choices=paragraph_picker_choices(state.get("lecture_text", "")),
|
| 1516 |
+
value=selected_paragraph_value,
|
| 1517 |
+
interactive=lecture_tts_ready,
|
| 1518 |
+
visible=lecture_tts_ready,
|
| 1519 |
+
),
|
| 1520 |
state.get("lecture_audio_path", None),
|
| 1521 |
gr.update(interactive=lecture_tts_ready),
|
| 1522 |
gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
|
|
|
|
| 1553 |
|
| 1554 |
state["lecture_text"] = lecture_text
|
| 1555 |
state["lecture_audio_path"] = None
|
| 1556 |
+
state["selected_paragraph_idx"] = ""
|
| 1557 |
state["explanation_audio_path"] = None
|
| 1558 |
state["last_explanation_tts_text"] = ""
|
| 1559 |
state["pdf_path"] = pdf_file
|
|
|
|
| 1801 |
state["character_id"] = cfg["id"]
|
| 1802 |
state["current_page"] = "explain"
|
| 1803 |
state["lecture_audio_path"] = None
|
| 1804 |
+
state["selected_paragraph_idx"] = ""
|
| 1805 |
state["explanation_audio_path"] = None
|
| 1806 |
state["last_explanation_tts_text"] = ""
|
| 1807 |
# Keep generated content if user wants to compare, but hide result pages until next generate.
|
|
|
|
| 1829 |
def play_lecture_audio(state: Dict[str, Any]):
|
| 1830 |
if not state.get("lecture_text"):
|
| 1831 |
state["status"] = "No lecture text available."
|
| 1832 |
+
return (
|
| 1833 |
+
state,
|
| 1834 |
+
state["status"],
|
| 1835 |
+
state.get("lecture_audio_path"),
|
| 1836 |
+
"Generate lecture first.",
|
| 1837 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1838 |
+
)
|
| 1839 |
backend = _tts_backend_name()
|
| 1840 |
voice = tts_voice_for_character(state.get("character_id"))
|
| 1841 |
try:
|
| 1842 |
state["status"] = f"Generating lecture audio ({backend})..."
|
| 1843 |
state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture", voice=voice)
|
| 1844 |
state["status"] = "Lecture audio ready."
|
| 1845 |
+
return (
|
| 1846 |
+
state,
|
| 1847 |
+
state["status"],
|
| 1848 |
+
state["lecture_audio_path"],
|
| 1849 |
+
f"Lecture audio generated via `{backend}`.",
|
| 1850 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1851 |
+
)
|
| 1852 |
except Exception as exc:
|
| 1853 |
state["status"] = "Lecture audio generation failed."
|
| 1854 |
return (
|
|
|
|
| 1856 |
state["status"],
|
| 1857 |
state.get("lecture_audio_path"),
|
| 1858 |
f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
|
| 1859 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1860 |
)
|
| 1861 |
|
| 1862 |
|
|
|
|
| 1868 |
return [p.strip() for p in pieces if p and p.strip()]
|
| 1869 |
|
| 1870 |
|
| 1871 |
+
def paragraph_picker_choices(lecture_text: str) -> List[tuple[str, str]]:
|
| 1872 |
+
paragraphs = split_lecture_paragraphs(lecture_text)
|
| 1873 |
+
choices: List[tuple[str, str]] = []
|
| 1874 |
+
for i, p in enumerate(paragraphs):
|
| 1875 |
+
preview = re.sub(r"\s+", " ", str(p or "")).strip()
|
| 1876 |
+
if len(preview) > 110:
|
| 1877 |
+
preview = preview[:107].rstrip() + "..."
|
| 1878 |
+
choices.append((f"Chunk {i + 1}: {preview}", str(i)))
|
| 1879 |
+
return choices
|
| 1880 |
+
|
| 1881 |
+
|
| 1882 |
+
def build_clickable_lecture_html(lecture_text: str, selected_idx: str = "") -> str:
|
| 1883 |
paragraphs = split_lecture_paragraphs(lecture_text)
|
| 1884 |
if not paragraphs:
|
| 1885 |
return '<div class="lecture-empty">Generated lecture explanation will appear here...</div>'
|
| 1886 |
+
selected = str(selected_idx or "").strip()
|
| 1887 |
parts: List[str] = ['<div class="lecture-clickable">']
|
| 1888 |
for i, p in enumerate(paragraphs):
|
| 1889 |
safe = html.escape(p, quote=False).replace("\n", "<br>")
|
| 1890 |
+
selected_cls = " is-selected" if selected and selected == str(i) else ""
|
| 1891 |
+
selected_style = (
|
| 1892 |
+
"background: #f97316 !important; "
|
| 1893 |
+
"border-color: #f97316 !important; "
|
| 1894 |
+
"box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important; "
|
| 1895 |
+
"color: #ffffff !important;"
|
| 1896 |
+
if selected_cls
|
| 1897 |
+
else ""
|
| 1898 |
+
)
|
| 1899 |
+
parts.append(
|
| 1900 |
+
f'<div class="lecture-paragraph{selected_cls}" data-idx="{i}" '
|
| 1901 |
+
f'style="{selected_style}" '
|
| 1902 |
+
f'onclick="window.__lectureSelectParagraph && window.__lectureSelectParagraph({i}, this, true);">{safe}</div>'
|
| 1903 |
+
)
|
| 1904 |
parts.append("</div>")
|
| 1905 |
return "".join(parts)
|
| 1906 |
|
|
|
|
| 1910 |
paragraphs = split_lecture_paragraphs(str(lecture_text or ""))
|
| 1911 |
if not paragraphs:
|
| 1912 |
state["status"] = "暂无讲解内容。"
|
| 1913 |
+
return (
|
| 1914 |
+
state,
|
| 1915 |
+
state.get("status", "Idle"),
|
| 1916 |
+
state.get("lecture_audio_path"),
|
| 1917 |
+
"请先生成讲解。",
|
| 1918 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1919 |
+
)
|
| 1920 |
|
| 1921 |
try:
|
| 1922 |
idx = int(str(paragraph_idx or "").strip())
|
|
|
|
| 1924 |
idx = -1
|
| 1925 |
if idx < 0 or idx >= len(paragraphs):
|
| 1926 |
state["status"] = "段落选择无效。"
|
| 1927 |
+
return (
|
| 1928 |
+
state,
|
| 1929 |
+
state.get("status", "Idle"),
|
| 1930 |
+
state.get("lecture_audio_path"),
|
| 1931 |
+
"请重新点击要播放的段落。",
|
| 1932 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1933 |
+
)
|
| 1934 |
|
| 1935 |
backend = _tts_backend_name()
|
| 1936 |
voice = tts_voice_for_character(state.get("character_id"))
|
| 1937 |
try:
|
| 1938 |
+
state["selected_paragraph_idx"] = str(idx)
|
| 1939 |
state["status"] = f"正在生成段落语音({backend})..."
|
| 1940 |
audio_path = engine.synthesize_tts(
|
| 1941 |
paragraphs[idx],
|
|
|
|
| 1944 |
)
|
| 1945 |
state["lecture_audio_path"] = audio_path
|
| 1946 |
state["status"] = "段落语音已生成。"
|
| 1947 |
+
char_len = len(paragraphs[idx])
|
| 1948 |
+
return (
|
| 1949 |
+
state,
|
| 1950 |
+
state["status"],
|
| 1951 |
+
audio_path,
|
| 1952 |
+
f"已生成第 {idx+1}/{len(paragraphs)} 段语音({char_len} 字符),可在下方播放。",
|
| 1953 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1954 |
+
)
|
| 1955 |
except Exception as exc:
|
| 1956 |
state["status"] = "段落语音生成失败。"
|
| 1957 |
+
return (
|
| 1958 |
+
state,
|
| 1959 |
+
state["status"],
|
| 1960 |
+
state.get("lecture_audio_path"),
|
| 1961 |
+
f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
|
| 1962 |
+
build_clickable_lecture_html(state.get("lecture_text", ""), str(state.get("selected_paragraph_idx", ""))),
|
| 1963 |
+
)
|
| 1964 |
|
| 1965 |
|
| 1966 |
def play_explanation_audio(state: Dict[str, Any]):
|
|
|
|
| 1979 |
return state, state["status"], state.get("explanation_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
|
| 1980 |
|
| 1981 |
|
| 1982 |
+
def on_play_lecture_audio_click(state: Dict[str, Any]):
|
| 1983 |
+
state, status, audio_path, feedback, lecture_html = play_lecture_audio(state)
|
| 1984 |
+
selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
|
| 1985 |
+
return (
|
| 1986 |
+
state,
|
| 1987 |
+
status,
|
| 1988 |
+
audio_path,
|
| 1989 |
+
feedback,
|
| 1990 |
+
lecture_html,
|
| 1991 |
+
gr.update(value=selected_paragraph_value),
|
| 1992 |
+
)
|
| 1993 |
+
|
| 1994 |
+
|
| 1995 |
+
def on_play_paragraph_click(paragraph_idx: str, state: Dict[str, Any]):
|
| 1996 |
+
state, status, audio_path, feedback, lecture_html = play_lecture_paragraph_audio(paragraph_idx, state)
|
| 1997 |
+
selected_paragraph_value = str(state.get("selected_paragraph_idx", "")).strip() or None
|
| 1998 |
+
return (
|
| 1999 |
+
state,
|
| 2000 |
+
status,
|
| 2001 |
+
audio_path,
|
| 2002 |
+
feedback,
|
| 2003 |
+
lecture_html,
|
| 2004 |
+
gr.update(value=selected_paragraph_value),
|
| 2005 |
+
)
|
| 2006 |
+
|
| 2007 |
+
|
| 2008 |
def build_css() -> str:
|
| 2009 |
bg_css = ""
|
| 2010 |
|
|
|
|
| 2310 |
line-height: 1.45 !important;
|
| 2311 |
color: rgba(244,246,251,0.95) !important;
|
| 2312 |
}}
|
| 2313 |
+
#lecture-clickable,
|
| 2314 |
+
#lecture-clickable .html-container,
|
| 2315 |
+
#lecture-clickable .html-container *,
|
| 2316 |
+
#lecture-clickable .lecture-clickable,
|
| 2317 |
+
#lecture-clickable .lecture-clickable * {{
|
| 2318 |
+
pointer-events: auto !important;
|
| 2319 |
+
}}
|
| 2320 |
#lecture-clickable .lecture-paragraph {{
|
| 2321 |
cursor: pointer;
|
| 2322 |
+
pointer-events: auto !important;
|
| 2323 |
padding: 10px 12px;
|
| 2324 |
border-radius: 14px;
|
| 2325 |
margin: 0 0 10px 0;
|
|
|
|
| 2333 |
background: rgba(255,255,255,0.08);
|
| 2334 |
border-color: rgba(255,255,255,0.14);
|
| 2335 |
}}
|
| 2336 |
+
#lecture-clickable .lecture-paragraph.is-selected {{
|
| 2337 |
+
background: #f97316 !important;
|
| 2338 |
+
border-color: #f97316 !important;
|
| 2339 |
+
box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
|
| 2340 |
+
color: #ffffff !important;
|
| 2341 |
+
}}
|
| 2342 |
+
#lecture-clickable .lecture-paragraph[data-selected="1"] {{
|
| 2343 |
+
background: #f97316 !important;
|
| 2344 |
+
border-color: #f97316 !important;
|
| 2345 |
+
box-shadow: 0 0 0 1px rgba(255,255,255,0.16) inset !important;
|
| 2346 |
+
color: #ffffff !important;
|
| 2347 |
+
}}
|
| 2348 |
.lecture-empty {{
|
| 2349 |
padding: 10px 12px;
|
| 2350 |
color: rgba(244,246,251,0.72);
|
|
|
|
| 2567 |
background: rgba(15, 23, 42, 0.06);
|
| 2568 |
border-color: rgba(15, 23, 42, 0.16);
|
| 2569 |
}}
|
| 2570 |
+
#lecture-clickable .lecture-paragraph.is-selected {{
|
| 2571 |
+
background: #f97316 !important;
|
| 2572 |
+
border-color: #f97316 !important;
|
| 2573 |
+
box-shadow: 0 0 0 1px rgba(255,255,255,0.18) inset !important;
|
| 2574 |
+
color: #ffffff !important;
|
| 2575 |
+
}}
|
| 2576 |
+
#lecture-clickable .lecture-paragraph[data-selected="1"] {{
|
| 2577 |
+
background: #f97316 !important;
|
| 2578 |
+
border-color: #f97316 !important;
|
| 2579 |
+
box-shadow: 0 0 0 1px rgba(255,255,255,0.18) inset !important;
|
| 2580 |
+
color: #ffffff !important;
|
| 2581 |
+
}}
|
| 2582 |
.lecture-empty {{
|
| 2583 |
color: rgba(15, 23, 42, 0.72);
|
| 2584 |
}}
|
|
|
|
| 2951 |
() => {
|
| 2952 |
const state = window.__lectureClickTtsGlobal || (window.__lectureClickTtsGlobal = {});
|
| 2953 |
if (state.bound) return;
|
| 2954 |
+
try {
|
| 2955 |
const grRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
|
| 2956 |
const rootCandidates = [
|
| 2957 |
document,
|
|
|
|
| 2998 |
state.observer = new MutationObserver(() => bindAudioLoading());
|
| 2999 |
state.observer.observe(document.body, { childList: true, subtree: true, attributes: true });
|
| 3000 |
}
|
| 3001 |
+
const selectParagraph = (idx, para, autoPlay) => {
|
| 3002 |
+
const indexText = String(idx ?? "").trim();
|
| 3003 |
+
const selectedInlineStyle = {
|
| 3004 |
+
background: "#f97316",
|
| 3005 |
+
borderColor: "#f97316",
|
| 3006 |
+
boxShadow: "0 0 0 1px rgba(255,255,255,0.16) inset",
|
| 3007 |
+
color: "#ffffff",
|
| 3008 |
+
};
|
| 3009 |
+
for (const r of rootCandidates) {
|
| 3010 |
+
const nodes = r.querySelectorAll ? r.querySelectorAll("#lecture-clickable .lecture-paragraph.is-selected") : [];
|
| 3011 |
+
for (const node of nodes) {
|
| 3012 |
+
node.classList.remove("is-selected");
|
| 3013 |
+
node.removeAttribute("data-selected");
|
| 3014 |
+
if (node.style) {
|
| 3015 |
+
node.style.removeProperty("background");
|
| 3016 |
+
node.style.removeProperty("border-color");
|
| 3017 |
+
node.style.removeProperty("box-shadow");
|
| 3018 |
+
node.style.removeProperty("color");
|
| 3019 |
+
}
|
| 3020 |
}
|
| 3021 |
+
}
|
| 3022 |
+
if (para && para.classList) {
|
| 3023 |
+
para.classList.add("is-selected");
|
| 3024 |
+
para.setAttribute("data-selected", "1");
|
| 3025 |
+
if (para.style) {
|
| 3026 |
+
para.style.setProperty("background", selectedInlineStyle.background, "important");
|
| 3027 |
+
para.style.setProperty("border-color", selectedInlineStyle.borderColor, "important");
|
| 3028 |
+
para.style.setProperty("box-shadow", selectedInlineStyle.boxShadow, "important");
|
| 3029 |
+
para.style.setProperty("color", selectedInlineStyle.color, "important");
|
| 3030 |
}
|
| 3031 |
+
}
|
| 3032 |
+
|
| 3033 |
+
let input = q("#selected-paragraph textarea, #selected-paragraph input");
|
| 3034 |
+
if (!input) {
|
| 3035 |
+
const inputWrap = q("#selected-paragraph");
|
| 3036 |
+
input = inputWrap && inputWrap.querySelector ? inputWrap.querySelector("textarea, input") : null;
|
| 3037 |
+
}
|
| 3038 |
+
if (!input) {
|
| 3039 |
+
showLoading("未找到段落选择控件,请刷新页面重试。");
|
| 3040 |
+
return;
|
| 3041 |
+
}
|
| 3042 |
+
input.value = indexText;
|
| 3043 |
+
input.dispatchEvent(new Event("input", { bubbles: true }));
|
| 3044 |
+
input.dispatchEvent(new Event("change", { bubbles: true }));
|
| 3045 |
+
|
| 3046 |
+
if (!autoPlay) return;
|
| 3047 |
+
let btn = q("#play-paragraph-btn button, #play-paragraph-btn");
|
| 3048 |
+
if (btn && btn.querySelector && btn.tagName !== "BUTTON") {
|
| 3049 |
+
const innerBtn = btn.querySelector("button");
|
| 3050 |
+
if (innerBtn) btn = innerBtn;
|
| 3051 |
+
}
|
| 3052 |
+
if (!btn) {
|
| 3053 |
+
showLoading("未找到段落播放控件,请刷新页面重试。");
|
| 3054 |
+
return;
|
| 3055 |
+
}
|
| 3056 |
+
showLoading("正在生成语音...");
|
| 3057 |
+
btn.click();
|
| 3058 |
+
};
|
| 3059 |
+
window.__lectureSelectParagraph = (idx, el, autoPlay = true) => {
|
| 3060 |
+
selectParagraph(idx, el, autoPlay);
|
| 3061 |
+
};
|
| 3062 |
+
|
| 3063 |
+
const paragraphFromEvent = (e) => {
|
| 3064 |
+
const target = e ? e.target : null;
|
| 3065 |
+
if (target && target.nodeType === 1 && target.closest) {
|
| 3066 |
+
const p = target.closest(".lecture-paragraph");
|
| 3067 |
+
if (p) return p;
|
| 3068 |
+
}
|
| 3069 |
+
const path = (e && typeof e.composedPath === "function") ? e.composedPath() : [];
|
| 3070 |
+
for (const n of path) {
|
| 3071 |
+
if (n && n.classList && n.classList.contains("lecture-paragraph")) return n;
|
| 3072 |
+
}
|
| 3073 |
+
return null;
|
| 3074 |
+
};
|
| 3075 |
+
|
| 3076 |
+
const onParagraphClick = (e) => {
|
| 3077 |
+
const para = paragraphFromEvent(e);
|
| 3078 |
+
if (!para) return;
|
| 3079 |
+
const idx = para.getAttribute("data-idx");
|
| 3080 |
+
if (typeof idx !== "string" || idx.trim() === "") return;
|
| 3081 |
+
selectParagraph(idx, para, true);
|
| 3082 |
+
};
|
| 3083 |
+
const bindClickRoot = (root) => {
|
| 3084 |
+
if (!root || !root.addEventListener) return;
|
| 3085 |
+
if (root.__lectureClickBound) return;
|
| 3086 |
+
root.__lectureClickBound = true;
|
| 3087 |
+
root.addEventListener("click", onParagraphClick, true);
|
| 3088 |
+
};
|
| 3089 |
+
|
| 3090 |
+
for (const r of rootCandidates) bindClickRoot(r);
|
| 3091 |
+
bindClickRoot(window);
|
| 3092 |
+
|
| 3093 |
+
if (!state.rebindObserver) {
|
| 3094 |
+
state.rebindObserver = new MutationObserver(() => {
|
| 3095 |
+
const nextRoot = (typeof window.gradioApp === "function") ? window.gradioApp() : null;
|
| 3096 |
+
for (const r of [document, nextRoot && nextRoot.shadowRoot ? nextRoot.shadowRoot : null, nextRoot]) {
|
| 3097 |
+
bindClickRoot(r);
|
| 3098 |
+
}
|
| 3099 |
+
});
|
| 3100 |
+
state.rebindObserver.observe(document.body, { childList: true, subtree: true });
|
| 3101 |
+
}
|
| 3102 |
+
state.bound = true;
|
| 3103 |
+
} catch (err) {
|
| 3104 |
+
state.bound = false;
|
| 3105 |
+
try { console.error("lecture click bridge failed:", err); } catch (_) {}
|
| 3106 |
+
}
|
| 3107 |
}
|
| 3108 |
""",
|
| 3109 |
)
|
|
|
|
| 3121 |
)
|
| 3122 |
with gr.Row(elem_id="lecture-actions"):
|
| 3123 |
play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
|
| 3124 |
+
gr.Markdown("提示:可直接点击段落播放;若浏览器拦截点击,请使用下方 Chunk selector。", elem_id="paragraph-tts-tip")
|
| 3125 |
+
paragraph_picker = gr.Radio(
|
| 3126 |
+
choices=[],
|
| 3127 |
+
value=None,
|
| 3128 |
+
interactive=False,
|
| 3129 |
+
visible=False,
|
| 3130 |
+
label="Chunks (fallback selector)",
|
| 3131 |
+
elem_id="paragraph-picker",
|
| 3132 |
+
)
|
| 3133 |
lecture_feedback = gr.Markdown("")
|
| 3134 |
with gr.Row(elem_id="exam-entry-wrap"):
|
| 3135 |
exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
|
|
|
|
| 3245 |
exam_page,
|
| 3246 |
status_box,
|
| 3247 |
lecture_box,
|
| 3248 |
+
paragraph_picker,
|
| 3249 |
lecture_audio,
|
| 3250 |
play_lecture_btn,
|
| 3251 |
exam_btn,
|
|
|
|
| 3272 |
submit_btn.click(fn=submit_answer, inputs=[choice_radio, state], outputs=outputs, show_progress="hidden")
|
| 3273 |
restart_btn.click(fn=restart_quiz, inputs=[state], outputs=outputs, show_progress="hidden")
|
| 3274 |
play_lecture_btn.click(
|
| 3275 |
+
fn=on_play_lecture_audio_click,
|
| 3276 |
inputs=[state],
|
| 3277 |
+
outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
|
| 3278 |
show_progress="minimal",
|
| 3279 |
)
|
| 3280 |
play_paragraph_btn.click(
|
| 3281 |
+
fn=on_play_paragraph_click,
|
| 3282 |
inputs=[paragraph_idx, state],
|
| 3283 |
+
outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
|
| 3284 |
+
show_progress="minimal",
|
| 3285 |
+
)
|
| 3286 |
+
paragraph_picker.change(
|
| 3287 |
+
fn=on_play_paragraph_click,
|
| 3288 |
+
inputs=[paragraph_picker, state],
|
| 3289 |
+
outputs=[state, status_box, lecture_audio, lecture_feedback, lecture_box, paragraph_picker],
|
| 3290 |
show_progress="minimal",
|
| 3291 |
)
|
| 3292 |
|