Jiaxuan Yang commited on
Commit ·
f718c5e
1
Parent(s): c19cf53
Fixed UI
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import base64
|
| 2 |
import html
|
| 3 |
-
import io
|
| 4 |
import json
|
| 5 |
import math
|
| 6 |
import os
|
|
@@ -35,11 +34,6 @@ try:
|
|
| 35 |
except Exception: # pragma: no cover
|
| 36 |
pdfium = None # type: ignore
|
| 37 |
|
| 38 |
-
try:
|
| 39 |
-
import soundfile as sf # type: ignore
|
| 40 |
-
except Exception: # pragma: no cover
|
| 41 |
-
sf = None # type: ignore
|
| 42 |
-
|
| 43 |
|
| 44 |
APP_DIR = Path(__file__).parent.resolve()
|
| 45 |
TMP_DIR = APP_DIR / "tmp_outputs"
|
|
@@ -68,41 +62,11 @@ CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
|
|
| 68 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 69 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
| 70 |
TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
|
| 71 |
-
TTS_BACKEND = (os.getenv("TTS_BACKEND") or "gpt_sovits_local").strip().lower()
|
| 72 |
API_TIMEOUT_SEC = int(os.getenv("API_TIMEOUT_SEC", "180"))
|
| 73 |
QWEN_VL_MAX_PAGES = int(os.getenv("QWEN_VL_MAX_PAGES", "4"))
|
| 74 |
QWEN_VL_RENDER_SCALE = float(os.getenv("QWEN_VL_RENDER_SCALE", "1.5"))
|
| 75 |
QWEN_VL_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MAX_NEW_TOKENS", "800"))
|
| 76 |
QWEN_VL_MCQ_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MCQ_MAX_NEW_TOKENS", "1800"))
|
| 77 |
-
GPT_SOVITS_BASE_URL = (os.getenv("GPT_SOVITS_BASE_URL") or "http://127.0.0.1:9880").rstrip("/")
|
| 78 |
-
GPT_SOVITS_TTS_ENDPOINT = os.getenv("GPT_SOVITS_TTS_ENDPOINT", "/tts")
|
| 79 |
-
GPT_SOVITS_SET_SOVITS_ENDPOINT = os.getenv("GPT_SOVITS_SET_SOVITS_ENDPOINT", "/set_sovits_weights")
|
| 80 |
-
GPT_SOVITS_SET_GPT_ENDPOINT = os.getenv("GPT_SOVITS_SET_GPT_ENDPOINT", "/set_gpt_weights")
|
| 81 |
-
GPT_SOVITS_DEFAULT_SOVITS_PATH = os.getenv(
|
| 82 |
-
"GPT_SOVITS_DEFAULT_SOVITS_PATH",
|
| 83 |
-
str((APP_DIR / "audio" / "s2Gv2ProPlus.pth").resolve()),
|
| 84 |
-
)
|
| 85 |
-
GPT_SOVITS_DEFAULT_GPT_PATH = os.getenv("GPT_SOVITS_DEFAULT_GPT_PATH", "")
|
| 86 |
-
GPT_SOVITS_DEFAULT_REF_AUDIO_PATH = os.getenv("GPT_SOVITS_REF_AUDIO_PATH", "")
|
| 87 |
-
GPT_SOVITS_DEFAULT_PROMPT_TEXT = os.getenv("GPT_SOVITS_PROMPT_TEXT", "")
|
| 88 |
-
GPT_SOVITS_DEFAULT_PROMPT_LANG = os.getenv("GPT_SOVITS_PROMPT_LANG", "zh")
|
| 89 |
-
GPT_SOVITS_DEFAULT_TEXT_LANG = os.getenv("GPT_SOVITS_TEXT_LANG", "zh")
|
| 90 |
-
GPT_SOVITS_MEDIA_TYPE = os.getenv("GPT_SOVITS_MEDIA_TYPE", "wav")
|
| 91 |
-
GPT_SOVITS_STREAMING_MODE = os.getenv("GPT_SOVITS_STREAMING_MODE", "0").strip() == "1"
|
| 92 |
-
GPT_SOVITS_ROLE_MODEL_MAP_RAW = os.getenv("GPT_SOVITS_ROLE_MODEL_MAP", "")
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
def _parse_json_dict_env(raw: str) -> Dict[str, Any]:
|
| 96 |
-
if not raw.strip():
|
| 97 |
-
return {}
|
| 98 |
-
try:
|
| 99 |
-
data = json.loads(raw)
|
| 100 |
-
except Exception:
|
| 101 |
-
return {}
|
| 102 |
-
return data if isinstance(data, dict) else {}
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
GPT_SOVITS_ROLE_MODEL_MAP = _parse_json_dict_env(GPT_SOVITS_ROLE_MODEL_MAP_RAW)
|
| 106 |
|
| 107 |
|
| 108 |
DEFAULT_LECTURE_PROMPT_TEMPLATE = """
|
|
@@ -221,14 +185,6 @@ def load_character_configs() -> Dict[str, Dict[str, Any]]:
|
|
| 221 |
d / str(meta.get("mcq_retry_prompt_file", "mcq_retry_prompt.txt")),
|
| 222 |
DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
|
| 223 |
),
|
| 224 |
-
# Optional local GPT-SoVITS overrides.
|
| 225 |
-
"voice_model": str(meta.get("voice_model", meta.get("display_name", cid))),
|
| 226 |
-
"gpt_sovits_sovits_path": str(meta.get("gpt_sovits_sovits_path", "")).strip(),
|
| 227 |
-
"gpt_sovits_gpt_path": str(meta.get("gpt_sovits_gpt_path", "")).strip(),
|
| 228 |
-
"gpt_sovits_ref_audio_path": str(meta.get("gpt_sovits_ref_audio_path", "")).strip(),
|
| 229 |
-
"gpt_sovits_prompt_text": str(meta.get("gpt_sovits_prompt_text", "")).strip(),
|
| 230 |
-
"gpt_sovits_prompt_lang": str(meta.get("gpt_sovits_prompt_lang", "")).strip(),
|
| 231 |
-
"gpt_sovits_text_lang": str(meta.get("gpt_sovits_text_lang", "")).strip(),
|
| 232 |
}
|
| 233 |
configs[cid] = config
|
| 234 |
|
|
@@ -245,13 +201,6 @@ def load_character_configs() -> Dict[str, Dict[str, Any]]:
|
|
| 245 |
"lecture_prompt_template": DEFAULT_LECTURE_PROMPT_TEMPLATE,
|
| 246 |
"mcq_prompt_template": DEFAULT_MCQ_PROMPT_TEMPLATE,
|
| 247 |
"mcq_retry_prompt_template": DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
|
| 248 |
-
"voice_model": "default",
|
| 249 |
-
"gpt_sovits_sovits_path": "",
|
| 250 |
-
"gpt_sovits_gpt_path": "",
|
| 251 |
-
"gpt_sovits_ref_audio_path": "",
|
| 252 |
-
"gpt_sovits_prompt_text": "",
|
| 253 |
-
"gpt_sovits_prompt_lang": "",
|
| 254 |
-
"gpt_sovits_text_lang": "",
|
| 255 |
}
|
| 256 |
return configs
|
| 257 |
|
|
@@ -266,45 +215,6 @@ def get_character_config(character_id: Optional[str]) -> Dict[str, Any]:
|
|
| 266 |
return CHARACTER_CONFIGS[DEFAULT_CHARACTER_ID]
|
| 267 |
|
| 268 |
|
| 269 |
-
def normalize_role_key(value: Optional[str]) -> str:
|
| 270 |
-
s = str(value or "").strip().lower()
|
| 271 |
-
return re.sub(r"[^a-z0-9]+", "", s)
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
def build_role_aliases(character_id: Optional[str], character_cfg: Optional[Dict[str, Any]] = None) -> List[str]:
|
| 275 |
-
raws: List[str] = []
|
| 276 |
-
if character_id:
|
| 277 |
-
raws.append(character_id)
|
| 278 |
-
if character_cfg:
|
| 279 |
-
for k in ["id", "display_name", "chat_label", "voice_model"]:
|
| 280 |
-
v = character_cfg.get(k)
|
| 281 |
-
if isinstance(v, str) and v.strip():
|
| 282 |
-
raws.append(v.strip())
|
| 283 |
-
seen: set[str] = set()
|
| 284 |
-
out: List[str] = []
|
| 285 |
-
for raw in raws:
|
| 286 |
-
for candidate in [raw, normalize_role_key(raw)]:
|
| 287 |
-
if not candidate or candidate in seen:
|
| 288 |
-
continue
|
| 289 |
-
seen.add(candidate)
|
| 290 |
-
out.append(candidate)
|
| 291 |
-
return out
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
def resolve_local_path_maybe(value: Optional[str]) -> str:
|
| 295 |
-
s = str(value or "").strip()
|
| 296 |
-
if not s:
|
| 297 |
-
return ""
|
| 298 |
-
p = Path(s).expanduser()
|
| 299 |
-
if not p.is_absolute():
|
| 300 |
-
p = (APP_DIR / p).resolve()
|
| 301 |
-
return str(p)
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
def file_exists(path_str: Optional[str]) -> bool:
|
| 305 |
-
return bool(path_str) and Path(str(path_str)).expanduser().exists()
|
| 306 |
-
|
| 307 |
-
|
| 308 |
@dataclass
|
| 309 |
class MCQItem:
|
| 310 |
question: str
|
|
@@ -551,12 +461,9 @@ class QwenPipelineEngine:
|
|
| 551 |
|
| 552 |
def __init__(self) -> None:
|
| 553 |
self.mock_mode = USE_MOCK_MODELS
|
| 554 |
-
self.tts_backend = TTS_BACKEND
|
| 555 |
self.vl_loaded = False
|
| 556 |
self.tts_loaded = False
|
| 557 |
self._pdf_page_cache: Dict[str, List[str]] = {}
|
| 558 |
-
self._loaded_sovits_weights: Optional[str] = None
|
| 559 |
-
self._loaded_gpt_weights: Optional[str] = None
|
| 560 |
|
| 561 |
def ensure_vl_loaded(self) -> None:
|
| 562 |
if self.vl_loaded:
|
|
@@ -572,11 +479,6 @@ class QwenPipelineEngine:
|
|
| 572 |
def ensure_tts_loaded(self) -> None:
|
| 573 |
if self.tts_loaded:
|
| 574 |
return
|
| 575 |
-
if self.tts_backend == "gpt_sovits_local":
|
| 576 |
-
if not GPT_SOVITS_BASE_URL:
|
| 577 |
-
raise RuntimeError("Missing GPT_SOVITS_BASE_URL for local GPT-SoVITS TTS.")
|
| 578 |
-
self.tts_loaded = True
|
| 579 |
-
return
|
| 580 |
if self.mock_mode:
|
| 581 |
self.tts_loaded = True
|
| 582 |
return
|
|
@@ -585,196 +487,6 @@ class QwenPipelineEngine:
|
|
| 585 |
raise RuntimeError("Missing API_KEY for TTS API calls.")
|
| 586 |
self.tts_loaded = True
|
| 587 |
|
| 588 |
-
def _gptsovits_endpoint_url(self, endpoint: str) -> str:
|
| 589 |
-
endpoint = endpoint.strip()
|
| 590 |
-
if endpoint.startswith("http://") or endpoint.startswith("https://"):
|
| 591 |
-
return endpoint
|
| 592 |
-
if not endpoint.startswith("/"):
|
| 593 |
-
endpoint = "/" + endpoint
|
| 594 |
-
return f"{GPT_SOVITS_BASE_URL}{endpoint}"
|
| 595 |
-
|
| 596 |
-
def _find_gptsovits_role_entry(
|
| 597 |
-
self,
|
| 598 |
-
character_id: Optional[str],
|
| 599 |
-
character_cfg: Optional[Dict[str, Any]],
|
| 600 |
-
) -> Dict[str, Any]:
|
| 601 |
-
aliases = build_role_aliases(character_id, character_cfg)
|
| 602 |
-
|
| 603 |
-
for key in aliases:
|
| 604 |
-
entry = GPT_SOVITS_ROLE_MODEL_MAP.get(key)
|
| 605 |
-
if entry is None:
|
| 606 |
-
continue
|
| 607 |
-
if isinstance(entry, str):
|
| 608 |
-
return {"sovits_path": entry}
|
| 609 |
-
if isinstance(entry, dict):
|
| 610 |
-
return dict(entry)
|
| 611 |
-
|
| 612 |
-
norm_map: Dict[str, Any] = {}
|
| 613 |
-
for k, v in GPT_SOVITS_ROLE_MODEL_MAP.items():
|
| 614 |
-
nk = normalize_role_key(k)
|
| 615 |
-
if nk and nk not in norm_map:
|
| 616 |
-
norm_map[nk] = v
|
| 617 |
-
for key in aliases:
|
| 618 |
-
entry = norm_map.get(normalize_role_key(key))
|
| 619 |
-
if entry is None:
|
| 620 |
-
continue
|
| 621 |
-
if isinstance(entry, str):
|
| 622 |
-
return {"sovits_path": entry}
|
| 623 |
-
if isinstance(entry, dict):
|
| 624 |
-
return dict(entry)
|
| 625 |
-
|
| 626 |
-
return {}
|
| 627 |
-
|
| 628 |
-
def _guess_sovits_path_from_audio_dir(self, aliases: List[str]) -> str:
|
| 629 |
-
audio_dir = APP_DIR / "audio"
|
| 630 |
-
if not audio_dir.exists():
|
| 631 |
-
return ""
|
| 632 |
-
pth_files = [p for p in audio_dir.iterdir() if p.is_file() and p.suffix.lower() == ".pth"]
|
| 633 |
-
if not pth_files:
|
| 634 |
-
return ""
|
| 635 |
-
|
| 636 |
-
alias_set = {normalize_role_key(a) for a in aliases if a}
|
| 637 |
-
for p in pth_files:
|
| 638 |
-
stem_norm = normalize_role_key(p.stem)
|
| 639 |
-
if stem_norm and stem_norm in alias_set:
|
| 640 |
-
return str(p.resolve())
|
| 641 |
-
for p in pth_files:
|
| 642 |
-
name_norm = normalize_role_key(p.name)
|
| 643 |
-
if name_norm and any(a and a in name_norm for a in alias_set):
|
| 644 |
-
return str(p.resolve())
|
| 645 |
-
return ""
|
| 646 |
-
|
| 647 |
-
def _guess_role_ref_audio_path(self, aliases: List[str]) -> str:
|
| 648 |
-
audio_dir = APP_DIR / "audio"
|
| 649 |
-
if not audio_dir.exists():
|
| 650 |
-
return ""
|
| 651 |
-
candidates = [p for p in audio_dir.iterdir() if p.is_file() and p.suffix.lower() in {".wav", ".mp3", ".flac", ".m4a"}]
|
| 652 |
-
alias_set = {normalize_role_key(a) for a in aliases if a}
|
| 653 |
-
for p in candidates:
|
| 654 |
-
stem_norm = normalize_role_key(p.stem)
|
| 655 |
-
if stem_norm and stem_norm in alias_set:
|
| 656 |
-
return str(p.resolve())
|
| 657 |
-
return ""
|
| 658 |
-
|
| 659 |
-
def _guess_role_prompt_text(self, aliases: List[str]) -> str:
|
| 660 |
-
audio_dir = APP_DIR / "audio"
|
| 661 |
-
if not audio_dir.exists():
|
| 662 |
-
return ""
|
| 663 |
-
alias_set = {normalize_role_key(a) for a in aliases if a}
|
| 664 |
-
for p in audio_dir.iterdir():
|
| 665 |
-
if not p.is_file() or p.suffix.lower() != ".txt":
|
| 666 |
-
continue
|
| 667 |
-
stem_norm = normalize_role_key(p.stem)
|
| 668 |
-
if stem_norm and stem_norm in alias_set:
|
| 669 |
-
try:
|
| 670 |
-
return p.read_text(encoding="utf-8").strip()
|
| 671 |
-
except Exception:
|
| 672 |
-
return ""
|
| 673 |
-
return ""
|
| 674 |
-
|
| 675 |
-
def _gptsovits_role_tts_config(
|
| 676 |
-
self,
|
| 677 |
-
character_id: Optional[str],
|
| 678 |
-
character_cfg: Optional[Dict[str, Any]],
|
| 679 |
-
) -> Dict[str, str]:
|
| 680 |
-
aliases = build_role_aliases(character_id, character_cfg)
|
| 681 |
-
entry = self._find_gptsovits_role_entry(character_id, character_cfg)
|
| 682 |
-
|
| 683 |
-
cfg = character_cfg or {}
|
| 684 |
-
sovits_path = resolve_local_path_maybe(
|
| 685 |
-
str(entry.get("sovits_path") or entry.get("model_path") or cfg.get("gpt_sovits_sovits_path") or "")
|
| 686 |
-
)
|
| 687 |
-
if not file_exists(sovits_path):
|
| 688 |
-
guessed = self._guess_sovits_path_from_audio_dir(aliases)
|
| 689 |
-
if guessed:
|
| 690 |
-
sovits_path = guessed
|
| 691 |
-
if not file_exists(sovits_path):
|
| 692 |
-
sovits_path = resolve_local_path_maybe(GPT_SOVITS_DEFAULT_SOVITS_PATH)
|
| 693 |
-
|
| 694 |
-
gpt_path = resolve_local_path_maybe(
|
| 695 |
-
str(entry.get("gpt_path") or cfg.get("gpt_sovits_gpt_path") or GPT_SOVITS_DEFAULT_GPT_PATH or "")
|
| 696 |
-
)
|
| 697 |
-
if gpt_path and not file_exists(gpt_path):
|
| 698 |
-
gpt_path = ""
|
| 699 |
-
|
| 700 |
-
ref_audio_path = resolve_local_path_maybe(
|
| 701 |
-
str(entry.get("ref_audio_path") or cfg.get("gpt_sovits_ref_audio_path") or GPT_SOVITS_DEFAULT_REF_AUDIO_PATH or "")
|
| 702 |
-
)
|
| 703 |
-
if ref_audio_path and not file_exists(ref_audio_path):
|
| 704 |
-
ref_audio_path = ""
|
| 705 |
-
if not ref_audio_path:
|
| 706 |
-
guessed_ref = self._guess_role_ref_audio_path(aliases)
|
| 707 |
-
if guessed_ref:
|
| 708 |
-
ref_audio_path = guessed_ref
|
| 709 |
-
|
| 710 |
-
prompt_text = str(
|
| 711 |
-
entry.get("prompt_text") or cfg.get("gpt_sovits_prompt_text") or GPT_SOVITS_DEFAULT_PROMPT_TEXT or ""
|
| 712 |
-
).strip()
|
| 713 |
-
if not prompt_text:
|
| 714 |
-
prompt_text = self._guess_role_prompt_text(aliases)
|
| 715 |
-
prompt_lang = str(
|
| 716 |
-
entry.get("prompt_lang") or cfg.get("gpt_sovits_prompt_lang") or GPT_SOVITS_DEFAULT_PROMPT_LANG or "zh"
|
| 717 |
-
).strip() or "zh"
|
| 718 |
-
text_lang = str(
|
| 719 |
-
entry.get("text_lang") or cfg.get("gpt_sovits_text_lang") or GPT_SOVITS_DEFAULT_TEXT_LANG or "zh"
|
| 720 |
-
).strip() or "zh"
|
| 721 |
-
|
| 722 |
-
return {
|
| 723 |
-
"sovits_path": sovits_path,
|
| 724 |
-
"gpt_path": gpt_path,
|
| 725 |
-
"ref_audio_path": ref_audio_path,
|
| 726 |
-
"prompt_text": prompt_text,
|
| 727 |
-
"prompt_lang": prompt_lang,
|
| 728 |
-
"text_lang": text_lang,
|
| 729 |
-
}
|
| 730 |
-
|
| 731 |
-
def _gptsovits_set_weights(self, endpoint: str, weights_path: str) -> None:
|
| 732 |
-
if not weights_path:
|
| 733 |
-
return
|
| 734 |
-
url = self._gptsovits_endpoint_url(endpoint)
|
| 735 |
-
attempts = [
|
| 736 |
-
("POST", {"weights_path": weights_path}),
|
| 737 |
-
("POST", {"path": weights_path}),
|
| 738 |
-
("GET", {"weights_path": weights_path}),
|
| 739 |
-
("GET", {"path": weights_path}),
|
| 740 |
-
]
|
| 741 |
-
last_err = ""
|
| 742 |
-
for method, payload in attempts:
|
| 743 |
-
try:
|
| 744 |
-
if method == "POST":
|
| 745 |
-
resp = requests.post(url, json=payload, timeout=API_TIMEOUT_SEC)
|
| 746 |
-
else:
|
| 747 |
-
resp = requests.get(url, params=payload, timeout=API_TIMEOUT_SEC)
|
| 748 |
-
if resp.status_code < 400:
|
| 749 |
-
return
|
| 750 |
-
last_err = f"{resp.status_code}: {resp.text[:400]}"
|
| 751 |
-
except requests.RequestException as exc:
|
| 752 |
-
last_err = f"{type(exc).__name__}: {exc}"
|
| 753 |
-
raise RuntimeError(f"Failed to load GPT-SoVITS weights via {url}. Last error: {last_err}")
|
| 754 |
-
|
| 755 |
-
def _gptsovits_ensure_role_model(
|
| 756 |
-
self,
|
| 757 |
-
character_id: Optional[str],
|
| 758 |
-
character_cfg: Optional[Dict[str, Any]],
|
| 759 |
-
) -> Dict[str, str]:
|
| 760 |
-
cfg = self._gptsovits_role_tts_config(character_id, character_cfg)
|
| 761 |
-
sovits_path = cfg.get("sovits_path", "")
|
| 762 |
-
if not sovits_path:
|
| 763 |
-
raise RuntimeError(
|
| 764 |
-
"No SoVITS weight found. Put role-specific .pth in ./audio/ or set GPT_SOVITS_DEFAULT_SOVITS_PATH."
|
| 765 |
-
)
|
| 766 |
-
if not file_exists(sovits_path):
|
| 767 |
-
raise RuntimeError(f"SoVITS weight file not found: {sovits_path}")
|
| 768 |
-
if self._loaded_sovits_weights != sovits_path:
|
| 769 |
-
self._gptsovits_set_weights(GPT_SOVITS_SET_SOVITS_ENDPOINT, sovits_path)
|
| 770 |
-
self._loaded_sovits_weights = sovits_path
|
| 771 |
-
|
| 772 |
-
gpt_path = cfg.get("gpt_path", "")
|
| 773 |
-
if gpt_path and self._loaded_gpt_weights != gpt_path:
|
| 774 |
-
self._gptsovits_set_weights(GPT_SOVITS_SET_GPT_ENDPOINT, gpt_path)
|
| 775 |
-
self._loaded_gpt_weights = gpt_path
|
| 776 |
-
return cfg
|
| 777 |
-
|
| 778 |
def _mock_generate_lecture(self, pdf_excerpt: str) -> str:
|
| 779 |
excerpt = re.sub(r"\s+", " ", pdf_excerpt).strip()
|
| 780 |
excerpt = excerpt[:1000]
|
|
@@ -958,132 +670,6 @@ class QwenPipelineEngine:
|
|
| 958 |
chunk_paths.append(self._real_tts_single(chunk, chunk_path))
|
| 959 |
return concat_wav_files(chunk_paths, out_path)
|
| 960 |
|
| 961 |
-
def _maybe_transcode_to_wav(self, audio_bytes: bytes, out_path: str) -> str:
|
| 962 |
-
if not audio_bytes:
|
| 963 |
-
raise RuntimeError("Empty audio payload from GPT-SoVITS.")
|
| 964 |
-
if audio_bytes[:4] == b"RIFF":
|
| 965 |
-
return _save_binary_audio(audio_bytes, out_path)
|
| 966 |
-
if sf is None:
|
| 967 |
-
return _save_binary_audio(audio_bytes, out_path)
|
| 968 |
-
try:
|
| 969 |
-
data, sr = sf.read(io.BytesIO(audio_bytes))
|
| 970 |
-
sf.write(out_path, data, sr, format="WAV")
|
| 971 |
-
return out_path
|
| 972 |
-
except Exception:
|
| 973 |
-
return _save_binary_audio(audio_bytes, out_path)
|
| 974 |
-
|
| 975 |
-
def _extract_audio_bytes_from_json(self, data: Dict[str, Any]) -> bytes:
|
| 976 |
-
candidates = [
|
| 977 |
-
data.get("audio"),
|
| 978 |
-
data.get("audio_base64"),
|
| 979 |
-
data.get("audioData"),
|
| 980 |
-
(data.get("data") or {}).get("audio") if isinstance(data.get("data"), dict) else None,
|
| 981 |
-
(data.get("output") or {}).get("audio") if isinstance(data.get("output"), dict) else None,
|
| 982 |
-
]
|
| 983 |
-
for item in candidates:
|
| 984 |
-
if isinstance(item, str) and item.strip():
|
| 985 |
-
s = item.strip()
|
| 986 |
-
if s.startswith("data:"):
|
| 987 |
-
_, _, s = s.partition(",")
|
| 988 |
-
try:
|
| 989 |
-
return base64.b64decode(s)
|
| 990 |
-
except Exception:
|
| 991 |
-
continue
|
| 992 |
-
url_candidates = [
|
| 993 |
-
data.get("audio_url"),
|
| 994 |
-
data.get("url"),
|
| 995 |
-
(data.get("output") or {}).get("audio_url") if isinstance(data.get("output"), dict) else None,
|
| 996 |
-
]
|
| 997 |
-
for u in url_candidates:
|
| 998 |
-
if isinstance(u, str) and u.strip():
|
| 999 |
-
resp = requests.get(u.strip(), timeout=API_TIMEOUT_SEC)
|
| 1000 |
-
if resp.status_code >= 400:
|
| 1001 |
-
raise RuntimeError(f"Failed downloading GPT-SoVITS audio URL {resp.status_code}: {resp.text[:300]}")
|
| 1002 |
-
return resp.content
|
| 1003 |
-
raise RuntimeError(f"GPT-SoVITS JSON response did not contain audio payload: {str(data)[:500]}")
|
| 1004 |
-
|
| 1005 |
-
def _gptsovits_tts_single(
|
| 1006 |
-
self,
|
| 1007 |
-
text: str,
|
| 1008 |
-
out_path: str,
|
| 1009 |
-
role_cfg: Dict[str, str],
|
| 1010 |
-
) -> str:
|
| 1011 |
-
if not text.strip():
|
| 1012 |
-
return write_tone_wav("empty", out_path)
|
| 1013 |
-
payload: Dict[str, Any] = {
|
| 1014 |
-
"text": text,
|
| 1015 |
-
"text_lang": role_cfg.get("text_lang") or "zh",
|
| 1016 |
-
"media_type": GPT_SOVITS_MEDIA_TYPE,
|
| 1017 |
-
"streaming_mode": GPT_SOVITS_STREAMING_MODE,
|
| 1018 |
-
}
|
| 1019 |
-
ref_audio_path = role_cfg.get("ref_audio_path", "").strip()
|
| 1020 |
-
prompt_text = role_cfg.get("prompt_text", "").strip()
|
| 1021 |
-
prompt_lang = role_cfg.get("prompt_lang", "").strip() or "zh"
|
| 1022 |
-
if ref_audio_path:
|
| 1023 |
-
payload["ref_audio_path"] = ref_audio_path
|
| 1024 |
-
if prompt_text:
|
| 1025 |
-
payload["prompt_text"] = prompt_text
|
| 1026 |
-
payload["prompt_lang"] = prompt_lang
|
| 1027 |
-
|
| 1028 |
-
url = self._gptsovits_endpoint_url(GPT_SOVITS_TTS_ENDPOINT)
|
| 1029 |
-
last_err = ""
|
| 1030 |
-
responses: List[requests.Response] = []
|
| 1031 |
-
try:
|
| 1032 |
-
responses.append(requests.post(url, json=payload, timeout=API_TIMEOUT_SEC))
|
| 1033 |
-
except requests.RequestException as exc:
|
| 1034 |
-
last_err = f"POST {type(exc).__name__}: {exc}"
|
| 1035 |
-
if not responses or responses[-1].status_code in {404, 405, 422}:
|
| 1036 |
-
try:
|
| 1037 |
-
responses.append(requests.get(url, params=payload, timeout=API_TIMEOUT_SEC))
|
| 1038 |
-
except requests.RequestException as exc:
|
| 1039 |
-
last_err = f"{last_err}; GET {type(exc).__name__}: {exc}".strip("; ")
|
| 1040 |
-
|
| 1041 |
-
for resp in responses:
|
| 1042 |
-
if resp.status_code >= 400:
|
| 1043 |
-
last_err = f"{resp.status_code}: {resp.text[:500]}"
|
| 1044 |
-
continue
|
| 1045 |
-
content_type = (resp.headers.get("content-type") or "").lower()
|
| 1046 |
-
if "application/json" in content_type:
|
| 1047 |
-
data = resp.json()
|
| 1048 |
-
audio_bytes = self._extract_audio_bytes_from_json(data)
|
| 1049 |
-
return self._maybe_transcode_to_wav(audio_bytes, out_path)
|
| 1050 |
-
return self._maybe_transcode_to_wav(resp.content, out_path)
|
| 1051 |
-
|
| 1052 |
-
missing_bits = []
|
| 1053 |
-
if not ref_audio_path:
|
| 1054 |
-
missing_bits.append("GPT_SOVITS_REF_AUDIO_PATH/ref_audio_path")
|
| 1055 |
-
if not prompt_text:
|
| 1056 |
-
missing_bits.append("GPT_SOVITS_PROMPT_TEXT/prompt_text")
|
| 1057 |
-
hint = ""
|
| 1058 |
-
if missing_bits:
|
| 1059 |
-
hint = f" (check {', '.join(missing_bits)} for your GPT-SoVITS API setup)"
|
| 1060 |
-
raise RuntimeError(f"GPT-SoVITS /tts request failed: {last_err}{hint}")
|
| 1061 |
-
|
| 1062 |
-
def _gptsovits_tts(
|
| 1063 |
-
self,
|
| 1064 |
-
text: str,
|
| 1065 |
-
out_path: str,
|
| 1066 |
-
*,
|
| 1067 |
-
character_id: Optional[str] = None,
|
| 1068 |
-
character_cfg: Optional[Dict[str, Any]] = None,
|
| 1069 |
-
) -> str:
|
| 1070 |
-
role_cfg = self._gptsovits_ensure_role_model(character_id, character_cfg)
|
| 1071 |
-
# For non-WAV outputs, avoid chunking because concatenation is WAV-only.
|
| 1072 |
-
if GPT_SOVITS_MEDIA_TYPE.lower() != "wav":
|
| 1073 |
-
return self._gptsovits_tts_single(text, out_path, role_cfg)
|
| 1074 |
-
|
| 1075 |
-
chunks = split_text_for_tts(text, max_len=220)
|
| 1076 |
-
if not chunks:
|
| 1077 |
-
return write_tone_wav("empty", out_path)
|
| 1078 |
-
if len(chunks) == 1:
|
| 1079 |
-
return self._gptsovits_tts_single(chunks[0], out_path, role_cfg)
|
| 1080 |
-
|
| 1081 |
-
chunk_paths: List[str] = []
|
| 1082 |
-
for idx, chunk in enumerate(chunks, start=1):
|
| 1083 |
-
chunk_out = str(TMP_DIR / f"gptsovits_chunk_{idx}_{uuid.uuid4().hex}.wav")
|
| 1084 |
-
chunk_paths.append(self._gptsovits_tts_single(chunk, chunk_out, role_cfg))
|
| 1085 |
-
return concat_wav_files(chunk_paths, out_path)
|
| 1086 |
-
|
| 1087 |
@spaces.GPU
|
| 1088 |
def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 1089 |
self.ensure_vl_loaded()
|
|
@@ -1198,18 +784,9 @@ class QwenPipelineEngine:
|
|
| 1198 |
return rebalance_mcq_answers([asdict(q) for q in mcqs])
|
| 1199 |
|
| 1200 |
@spaces.GPU
|
| 1201 |
-
def synthesize_tts(
|
| 1202 |
-
self,
|
| 1203 |
-
text: str,
|
| 1204 |
-
name_prefix: str = "audio",
|
| 1205 |
-
*,
|
| 1206 |
-
character_id: Optional[str] = None,
|
| 1207 |
-
character_cfg: Optional[Dict[str, Any]] = None,
|
| 1208 |
-
) -> str:
|
| 1209 |
self.ensure_tts_loaded()
|
| 1210 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
| 1211 |
-
if self.tts_backend == "gpt_sovits_local":
|
| 1212 |
-
return self._gptsovits_tts(text, out_path, character_id=character_id, character_cfg=character_cfg)
|
| 1213 |
if self.mock_mode:
|
| 1214 |
return write_tone_wav(text, out_path)
|
| 1215 |
return self._real_tts(text, out_path)
|
|
@@ -2000,15 +1577,8 @@ def play_lecture_audio(state: Dict[str, Any]):
|
|
| 2000 |
state["status"] = "No lecture text available."
|
| 2001 |
return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
|
| 2002 |
try:
|
| 2003 |
-
character_id = str(state.get("character_id") or DEFAULT_CHARACTER_ID)
|
| 2004 |
-
character_cfg = get_character_config(character_id)
|
| 2005 |
state["status"] = "Generating lecture audio..."
|
| 2006 |
-
state["lecture_audio_path"] = engine.synthesize_tts(
|
| 2007 |
-
state["lecture_text"],
|
| 2008 |
-
name_prefix="lecture",
|
| 2009 |
-
character_id=character_id,
|
| 2010 |
-
character_cfg=character_cfg,
|
| 2011 |
-
)
|
| 2012 |
state["status"] = "Lecture audio ready."
|
| 2013 |
return state, state["status"], state["lecture_audio_path"], "Lecture audio generated."
|
| 2014 |
except Exception as exc:
|
|
@@ -2022,15 +1592,8 @@ def play_explanation_audio(state: Dict[str, Any]):
|
|
| 2022 |
state["status"] = "No explanation available for TTS."
|
| 2023 |
return state, state["status"], state.get("explanation_audio_path"), "Answer a question incorrectly first."
|
| 2024 |
try:
|
| 2025 |
-
character_id = str(state.get("exam_character_id") or state.get("character_id") or DEFAULT_CHARACTER_ID)
|
| 2026 |
-
character_cfg = get_character_config(character_id)
|
| 2027 |
state["status"] = "Generating explanation audio..."
|
| 2028 |
-
state["explanation_audio_path"] = engine.synthesize_tts(
|
| 2029 |
-
text,
|
| 2030 |
-
name_prefix="explanation",
|
| 2031 |
-
character_id=character_id,
|
| 2032 |
-
character_cfg=character_cfg,
|
| 2033 |
-
)
|
| 2034 |
state["status"] = "Explanation audio ready."
|
| 2035 |
return state, state["status"], state["explanation_audio_path"], "Explanation audio generated."
|
| 2036 |
except Exception as exc:
|
|
@@ -2045,14 +1608,16 @@ def build_css() -> str:
|
|
| 2045 |
@import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Inter:wght@400;500;600;700&display=swap');
|
| 2046 |
|
| 2047 |
html, body {{
|
|
|
|
| 2048 |
min-height: 100%;
|
| 2049 |
-
height: auto;
|
| 2050 |
}}
|
| 2051 |
body {{
|
| 2052 |
background-color: #ffffff !important;
|
|
|
|
| 2053 |
font-family: "Inter", sans-serif !important;
|
| 2054 |
}}
|
| 2055 |
.app, #root, .gradio-container, .gradio-container > .main {{
|
|
|
|
| 2056 |
background: transparent !important;
|
| 2057 |
}}
|
| 2058 |
.gradio-container {{
|
|
@@ -2087,8 +1652,8 @@ body {{
|
|
| 2087 |
color: #eef1f6 !important;
|
| 2088 |
}}
|
| 2089 |
#page-shell {{
|
| 2090 |
-
min-height:
|
| 2091 |
-
padding: 2rem 1.2rem
|
| 2092 |
max-width: 980px;
|
| 2093 |
margin: 0 auto;
|
| 2094 |
}}
|
|
@@ -2354,8 +1919,12 @@ body {{
|
|
| 2354 |
margin-top: 0.25rem !important;
|
| 2355 |
}}
|
| 2356 |
#bottom-composer {{
|
| 2357 |
-
|
| 2358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2359 |
background: rgba(24, 26, 34, 0.88);
|
| 2360 |
border: 1px solid rgba(255,255,255,0.08);
|
| 2361 |
border-radius: 999px;
|
|
@@ -2474,79 +2043,7 @@ body {{
|
|
| 2474 |
border: 1px solid rgba(59, 130, 246, 0.28);
|
| 2475 |
color: rgba(255, 255, 255, 0.95);
|
| 2476 |
}}
|
| 2477 |
-
|
| 2478 |
-
position: fixed;
|
| 2479 |
-
inset: 0;
|
| 2480 |
-
z-index: 200;
|
| 2481 |
-
display: none;
|
| 2482 |
-
align-items: center;
|
| 2483 |
-
justify-content: center;
|
| 2484 |
-
background: rgba(2, 6, 23, 0.55);
|
| 2485 |
-
backdrop-filter: blur(6px);
|
| 2486 |
-
padding: 16px;
|
| 2487 |
-
}}
|
| 2488 |
-
#exam-picker-overlay:not(.hide) {{
|
| 2489 |
-
display: flex;
|
| 2490 |
-
}}
|
| 2491 |
-
#exam-picker-overlay.hide {{
|
| 2492 |
-
display: none !important;
|
| 2493 |
-
pointer-events: none !important;
|
| 2494 |
-
}}
|
| 2495 |
-
#exam-picker-modal {{
|
| 2496 |
-
width: min(720px, 94vw);
|
| 2497 |
-
border-radius: 16px;
|
| 2498 |
-
background: rgba(14, 16, 24, 0.96);
|
| 2499 |
-
border: 1px solid rgba(255, 255, 255, 0.12);
|
| 2500 |
-
box-shadow: 0 18px 50px rgba(0, 0, 0, 0.45);
|
| 2501 |
-
padding: 16px;
|
| 2502 |
-
height: auto !important;
|
| 2503 |
-
max-height: 320px;
|
| 2504 |
-
overflow: hidden;
|
| 2505 |
-
}}
|
| 2506 |
-
#exam-picker-modal .block,
|
| 2507 |
-
#exam-picker-modal .wrap,
|
| 2508 |
-
#exam-picker-modal .panel {{
|
| 2509 |
-
background: transparent !important;
|
| 2510 |
-
border: none !important;
|
| 2511 |
-
box-shadow: none !important;
|
| 2512 |
-
}}
|
| 2513 |
-
#exam-picker-title {{
|
| 2514 |
-
font-weight: 700;
|
| 2515 |
-
color: #f4f6fb;
|
| 2516 |
-
margin-bottom: 10px;
|
| 2517 |
-
}}
|
| 2518 |
-
.exam-picker-grid {{
|
| 2519 |
-
display: flex !important;
|
| 2520 |
-
flex-wrap: nowrap;
|
| 2521 |
-
gap: 12px;
|
| 2522 |
-
}}
|
| 2523 |
-
.exam-picker-card {{
|
| 2524 |
-
flex: 1 1 0;
|
| 2525 |
-
min-width: 0 !important;
|
| 2526 |
-
border-radius: 14px;
|
| 2527 |
-
border: 1px solid rgba(255, 255, 255, 0.14);
|
| 2528 |
-
background: rgba(255, 255, 255, 0.06);
|
| 2529 |
-
padding: 12px;
|
| 2530 |
-
overflow: hidden;
|
| 2531 |
-
transition: transform 120ms ease, border-color 120ms ease, box-shadow 120ms ease;
|
| 2532 |
-
}}
|
| 2533 |
-
.exam-picker-card:hover {{
|
| 2534 |
-
transform: translateY(-2px);
|
| 2535 |
-
border-color: rgba(59, 130, 246, 0.42);
|
| 2536 |
-
box-shadow: 0 10px 24px rgba(0, 0, 0, 0.35);
|
| 2537 |
-
}}
|
| 2538 |
-
.exam-picker-avatar {{
|
| 2539 |
-
width: 56px;
|
| 2540 |
-
height: 56px;
|
| 2541 |
-
border-radius: 999px;
|
| 2542 |
-
object-fit: cover;
|
| 2543 |
-
display: block;
|
| 2544 |
-
margin: 0 auto 10px auto;
|
| 2545 |
-
}}
|
| 2546 |
-
.exam-picker-card button {{
|
| 2547 |
-
width: 100%;
|
| 2548 |
-
}}
|
| 2549 |
-
@media (prefers-color-scheme: light) and (prefers-color-scheme: dark) {{
|
| 2550 |
body {{
|
| 2551 |
background: linear-gradient(180deg, #f5f7fb 0%, #eef2f8 100%) !important;
|
| 2552 |
}}
|
|
@@ -2935,18 +2432,6 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2935 |
container=False,
|
| 2936 |
)
|
| 2937 |
|
| 2938 |
-
with gr.Row(elem_id="bottom-composer"):
|
| 2939 |
-
pdf_input = gr.File(
|
| 2940 |
-
label="",
|
| 2941 |
-
show_label=False,
|
| 2942 |
-
file_types=[".pdf"],
|
| 2943 |
-
type="filepath",
|
| 2944 |
-
elem_id="pdf-uploader",
|
| 2945 |
-
scale=7,
|
| 2946 |
-
min_width=0,
|
| 2947 |
-
)
|
| 2948 |
-
run_btn = gr.Button("Generate", variant="primary", elem_id="generate-btn", scale=3, min_width=120)
|
| 2949 |
-
|
| 2950 |
state = gr.State(new_session_state())
|
| 2951 |
|
| 2952 |
loading_md = gr.HTML("", elem_id="gen-loading", visible=False)
|
|
@@ -3055,6 +2540,18 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 3055 |
score_box = gr.Textbox(label="Score", value="Score: 0 / 0", interactive=False, visible=False)
|
| 3056 |
feedback_box = gr.Textbox(label="Feedback / Explanation", lines=8, interactive=False, visible=False)
|
| 3057 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3058 |
outputs = [
|
| 3059 |
state,
|
| 3060 |
character_header_html,
|
|
@@ -3102,4 +2599,9 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 3102 |
demo.queue()
|
| 3103 |
|
| 3104 |
if __name__ == "__main__":
|
| 3105 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import base64
|
| 2 |
import html
|
|
|
|
| 3 |
import json
|
| 4 |
import math
|
| 5 |
import os
|
|
|
|
| 34 |
except Exception: # pragma: no cover
|
| 35 |
pdfium = None # type: ignore
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
APP_DIR = Path(__file__).parent.resolve()
|
| 39 |
TMP_DIR = APP_DIR / "tmp_outputs"
|
|
|
|
| 62 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 63 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
| 64 |
TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
|
|
|
|
| 65 |
API_TIMEOUT_SEC = int(os.getenv("API_TIMEOUT_SEC", "180"))
|
| 66 |
QWEN_VL_MAX_PAGES = int(os.getenv("QWEN_VL_MAX_PAGES", "4"))
|
| 67 |
QWEN_VL_RENDER_SCALE = float(os.getenv("QWEN_VL_RENDER_SCALE", "1.5"))
|
| 68 |
QWEN_VL_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MAX_NEW_TOKENS", "800"))
|
| 69 |
QWEN_VL_MCQ_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MCQ_MAX_NEW_TOKENS", "1800"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
DEFAULT_LECTURE_PROMPT_TEMPLATE = """
|
|
|
|
| 185 |
d / str(meta.get("mcq_retry_prompt_file", "mcq_retry_prompt.txt")),
|
| 186 |
DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
|
| 187 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
configs[cid] = config
|
| 190 |
|
|
|
|
| 201 |
"lecture_prompt_template": DEFAULT_LECTURE_PROMPT_TEMPLATE,
|
| 202 |
"mcq_prompt_template": DEFAULT_MCQ_PROMPT_TEMPLATE,
|
| 203 |
"mcq_retry_prompt_template": DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
}
|
| 205 |
return configs
|
| 206 |
|
|
|
|
| 215 |
return CHARACTER_CONFIGS[DEFAULT_CHARACTER_ID]
|
| 216 |
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
@dataclass
|
| 219 |
class MCQItem:
|
| 220 |
question: str
|
|
|
|
| 461 |
|
| 462 |
def __init__(self) -> None:
|
| 463 |
self.mock_mode = USE_MOCK_MODELS
|
|
|
|
| 464 |
self.vl_loaded = False
|
| 465 |
self.tts_loaded = False
|
| 466 |
self._pdf_page_cache: Dict[str, List[str]] = {}
|
|
|
|
|
|
|
| 467 |
|
| 468 |
def ensure_vl_loaded(self) -> None:
|
| 469 |
if self.vl_loaded:
|
|
|
|
| 479 |
def ensure_tts_loaded(self) -> None:
|
| 480 |
if self.tts_loaded:
|
| 481 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
if self.mock_mode:
|
| 483 |
self.tts_loaded = True
|
| 484 |
return
|
|
|
|
| 487 |
raise RuntimeError("Missing API_KEY for TTS API calls.")
|
| 488 |
self.tts_loaded = True
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
def _mock_generate_lecture(self, pdf_excerpt: str) -> str:
|
| 491 |
excerpt = re.sub(r"\s+", " ", pdf_excerpt).strip()
|
| 492 |
excerpt = excerpt[:1000]
|
|
|
|
| 670 |
chunk_paths.append(self._real_tts_single(chunk, chunk_path))
|
| 671 |
return concat_wav_files(chunk_paths, out_path)
|
| 672 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
@spaces.GPU
|
| 674 |
def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
| 675 |
self.ensure_vl_loaded()
|
|
|
|
| 784 |
return rebalance_mcq_answers([asdict(q) for q in mcqs])
|
| 785 |
|
| 786 |
@spaces.GPU
|
| 787 |
+
def synthesize_tts(self, text: str, name_prefix: str = "audio") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
self.ensure_tts_loaded()
|
| 789 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
|
|
|
|
|
|
| 790 |
if self.mock_mode:
|
| 791 |
return write_tone_wav(text, out_path)
|
| 792 |
return self._real_tts(text, out_path)
|
|
|
|
| 1577 |
state["status"] = "No lecture text available."
|
| 1578 |
return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
|
| 1579 |
try:
|
|
|
|
|
|
|
| 1580 |
state["status"] = "Generating lecture audio..."
|
| 1581 |
+
state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1582 |
state["status"] = "Lecture audio ready."
|
| 1583 |
return state, state["status"], state["lecture_audio_path"], "Lecture audio generated."
|
| 1584 |
except Exception as exc:
|
|
|
|
| 1592 |
state["status"] = "No explanation available for TTS."
|
| 1593 |
return state, state["status"], state.get("explanation_audio_path"), "Answer a question incorrectly first."
|
| 1594 |
try:
|
|
|
|
|
|
|
| 1595 |
state["status"] = "Generating explanation audio..."
|
| 1596 |
+
state["explanation_audio_path"] = engine.synthesize_tts(text, name_prefix="explanation")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1597 |
state["status"] = "Explanation audio ready."
|
| 1598 |
return state, state["status"], state["explanation_audio_path"], "Explanation audio generated."
|
| 1599 |
except Exception as exc:
|
|
|
|
| 1608 |
@import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Inter:wght@400;500;600;700&display=swap');
|
| 1609 |
|
| 1610 |
html, body {{
|
| 1611 |
+
height: 100%;
|
| 1612 |
min-height: 100%;
|
|
|
|
| 1613 |
}}
|
| 1614 |
body {{
|
| 1615 |
background-color: #ffffff !important;
|
| 1616 |
+
color: #0f172a !important;
|
| 1617 |
font-family: "Inter", sans-serif !important;
|
| 1618 |
}}
|
| 1619 |
.app, #root, .gradio-container, .gradio-container > .main {{
|
| 1620 |
+
min-height: 100%;
|
| 1621 |
background: transparent !important;
|
| 1622 |
}}
|
| 1623 |
.gradio-container {{
|
|
|
|
| 1652 |
color: #eef1f6 !important;
|
| 1653 |
}}
|
| 1654 |
#page-shell {{
|
| 1655 |
+
min-height: 100%;
|
| 1656 |
+
padding: 2rem 1.2rem 9rem 1.2rem;
|
| 1657 |
max-width: 980px;
|
| 1658 |
margin: 0 auto;
|
| 1659 |
}}
|
|
|
|
| 1919 |
margin-top: 0.25rem !important;
|
| 1920 |
}}
|
| 1921 |
#bottom-composer {{
|
| 1922 |
+
position: fixed;
|
| 1923 |
+
left: 50%;
|
| 1924 |
+
transform: translateX(-50%);
|
| 1925 |
+
bottom: 18px;
|
| 1926 |
+
width: min(860px, calc(100vw - 28px));
|
| 1927 |
+
z-index: 40;
|
| 1928 |
background: rgba(24, 26, 34, 0.88);
|
| 1929 |
border: 1px solid rgba(255,255,255,0.08);
|
| 1930 |
border-radius: 999px;
|
|
|
|
| 2043 |
border: 1px solid rgba(59, 130, 246, 0.28);
|
| 2044 |
color: rgba(255, 255, 255, 0.95);
|
| 2045 |
}}
|
| 2046 |
+
@media (prefers-color-scheme: light) {{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2047 |
body {{
|
| 2048 |
background: linear-gradient(180deg, #f5f7fb 0%, #eef2f8 100%) !important;
|
| 2049 |
}}
|
|
|
|
| 2432 |
container=False,
|
| 2433 |
)
|
| 2434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2435 |
state = gr.State(new_session_state())
|
| 2436 |
|
| 2437 |
loading_md = gr.HTML("", elem_id="gen-loading", visible=False)
|
|
|
|
| 2540 |
score_box = gr.Textbox(label="Score", value="Score: 0 / 0", interactive=False, visible=False)
|
| 2541 |
feedback_box = gr.Textbox(label="Feedback / Explanation", lines=8, interactive=False, visible=False)
|
| 2542 |
|
| 2543 |
+
with gr.Row(elem_id="bottom-composer"):
|
| 2544 |
+
pdf_input = gr.File(
|
| 2545 |
+
label="",
|
| 2546 |
+
show_label=False,
|
| 2547 |
+
file_types=[".pdf"],
|
| 2548 |
+
type="filepath",
|
| 2549 |
+
elem_id="pdf-uploader",
|
| 2550 |
+
scale=7,
|
| 2551 |
+
min_width=0,
|
| 2552 |
+
)
|
| 2553 |
+
run_btn = gr.Button("Generate", variant="primary", elem_id="generate-btn", scale=3, min_width=120)
|
| 2554 |
+
|
| 2555 |
outputs = [
|
| 2556 |
state,
|
| 2557 |
character_header_html,
|
|
|
|
| 2599 |
demo.queue()
|
| 2600 |
|
| 2601 |
if __name__ == "__main__":
|
| 2602 |
+
demo.launch(
|
| 2603 |
+
server_name="0.0.0.0",
|
| 2604 |
+
server_port=7860,
|
| 2605 |
+
css=CSS,
|
| 2606 |
+
ssr_mode=False,
|
| 2607 |
+
)
|