Fix generation bug: correct positional arg mapping for Quick Generate, main Generate, Save/Load. Add *args unpacking for 80 character inputs.
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ file upload, chapter selection, segment previews, and project save/load.
|
|
| 6 |
|
| 7 |
import os
|
| 8 |
import json
|
| 9 |
-
import base64
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import Dict, List, Optional
|
| 12 |
|
|
@@ -147,24 +146,20 @@ input:focus, textarea:focus, select:focus {
|
|
| 147 |
border-color: #334155 !important;
|
| 148 |
}
|
| 149 |
|
| 150 |
-
/* Fix checkbox and label contrast */
|
| 151 |
input[type="checkbox"] + label,
|
| 152 |
.checkbox-label,
|
| 153 |
.gr-checkbox label {
|
| 154 |
color: #f8fafc !important;
|
| 155 |
}
|
| 156 |
|
| 157 |
-
/* Fix list text in tips */
|
| 158 |
li, .prose li, .gr-prose li {
|
| 159 |
color: #cbd5e1 !important;
|
| 160 |
}
|
| 161 |
|
| 162 |
-
/* Ensure strong/bold text is visible */
|
| 163 |
strong, b {
|
| 164 |
color: #f8fafc !important;
|
| 165 |
}
|
| 166 |
|
| 167 |
-
/* Code inline styling */
|
| 168 |
code {
|
| 169 |
background: #334155 !important;
|
| 170 |
color: #22d3ee !important;
|
|
@@ -172,7 +167,6 @@ code {
|
|
| 172 |
border-radius: 4px !important;
|
| 173 |
}
|
| 174 |
|
| 175 |
-
/* Progress bar styling */
|
| 176 |
progress {
|
| 177 |
width: 100%;
|
| 178 |
height: 8px;
|
|
@@ -188,7 +182,6 @@ progress::-webkit-progress-value {
|
|
| 188 |
border-radius: 4px;
|
| 189 |
}
|
| 190 |
|
| 191 |
-
/* Segment list styling */
|
| 192 |
.seg-item {
|
| 193 |
background: #0f172a;
|
| 194 |
border: 1px solid #334155;
|
|
@@ -214,10 +207,6 @@ progress::-webkit-progress-value {
|
|
| 214 |
# ---------------------------------------------------------------------------
|
| 215 |
|
| 216 |
_pipeline: Optional[AudiobookPipeline] = None
|
| 217 |
-
_stored_text: str = ""
|
| 218 |
-
_stored_chapters: List[Dict] = []
|
| 219 |
-
_stored_segments_meta: List[Dict] = []
|
| 220 |
-
_stored_segment_paths: List[str] = []
|
| 221 |
|
| 222 |
|
| 223 |
def get_pipeline() -> AudiobookPipeline:
|
|
@@ -258,15 +247,13 @@ def handle_upload(file_obj) -> tuple:
|
|
| 258 |
pipe = get_pipeline()
|
| 259 |
text, fname = pipe.parse_upload(file_obj)
|
| 260 |
text = pipe.processor.clean_text(text)
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
if len(_stored_chapters) > 5:
|
| 266 |
-
ch_info += f" (+{len(_stored_chapters)-5} more)"
|
| 267 |
wc = len(text.split())
|
| 268 |
dur = estimate_duration(wc)
|
| 269 |
-
return text, f"Loaded {fname} — {wc} words (~{dur}) |
|
| 270 |
except Exception as e:
|
| 271 |
return "", f"Error: {e}"
|
| 272 |
|
|
@@ -280,11 +267,15 @@ def extract_chars(text: str, use_ai: bool) -> tuple:
|
|
| 280 |
return chars, status
|
| 281 |
|
| 282 |
|
| 283 |
-
def get_chapter_text(text: str,
|
| 284 |
-
if not text:
|
| 285 |
-
return
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
|
| 290 |
# ---------------------------------------------------------------------------
|
|
@@ -295,11 +286,22 @@ def get_chapter_text(text: str, chapter_idx: int) -> str:
|
|
| 295 |
def generate_audiobook_gpu(
|
| 296 |
text,
|
| 297 |
nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 298 |
-
gen_temp, gen_seed, output_fmt,
|
| 299 |
-
names, descs, modes, presets, audios, ref_texts, designs, instructs, langs, speeds,
|
| 300 |
):
|
| 301 |
if not text or len(text.strip()) < 50:
|
| 302 |
-
return None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
pipe = get_pipeline()
|
| 305 |
|
|
@@ -312,7 +314,7 @@ def generate_audiobook_gpu(
|
|
| 312 |
design_desc=nar_design if nar_mode == "design" else None,
|
| 313 |
instruct=nar_instruct,
|
| 314 |
language=nar_lang,
|
| 315 |
-
speed=float(nar_speed),
|
| 316 |
)
|
| 317 |
|
| 318 |
char_configs = {}
|
|
@@ -348,11 +350,7 @@ def generate_audiobook_gpu(
|
|
| 348 |
temperature=gen_temp,
|
| 349 |
seed=int(gen_seed),
|
| 350 |
)
|
| 351 |
-
global _stored_segment_paths, _stored_segments_meta
|
| 352 |
-
_stored_segment_paths = seg_paths
|
| 353 |
-
_stored_segments_meta = seg_meta
|
| 354 |
|
| 355 |
-
# Build segment list HTML
|
| 356 |
seg_html = "<div style='max-height: 300px; overflow-y: auto;'>"
|
| 357 |
for s in seg_meta[:50]:
|
| 358 |
tclass = "narration" if s['type'] == 'narration' else "dialogue"
|
|
@@ -361,7 +359,6 @@ def generate_audiobook_gpu(
|
|
| 361 |
seg_html += f"<div style='text-align:center;color:#94a3b8;padding:0.5rem;'>... and {len(seg_meta)-50} more segments</div>"
|
| 362 |
seg_html += "</div>"
|
| 363 |
|
| 364 |
-
# Extra export
|
| 365 |
extra_path = None
|
| 366 |
if output_fmt == "wav":
|
| 367 |
extra_path = output_path.replace(".mp3", ".wav")
|
|
@@ -390,7 +387,7 @@ def preview_narrator_gpu(mode, preset, audio, ref_text, design, instruct, lang,
|
|
| 390 |
design_desc=design if mode == "design" else None,
|
| 391 |
instruct=instruct,
|
| 392 |
language=lang,
|
| 393 |
-
speed=float(speed),
|
| 394 |
)
|
| 395 |
try:
|
| 396 |
wav, sr = pipe.preview_voice(vc)
|
|
@@ -425,19 +422,74 @@ def preview_char_voice_gpu(name, mode, preset, audio, ref_text, design, instruct
|
|
| 425 |
return None, f"Preview failed: {e}"
|
| 426 |
|
| 427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
# ---------------------------------------------------------------------------
|
| 429 |
# Project Save/Load
|
| 430 |
# ---------------------------------------------------------------------------
|
| 431 |
|
| 432 |
-
def do_save_project(text, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 433 |
-
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
nar_cfg = VoiceConfig(
|
| 436 |
name="Narrator", mode=nar_mode, preset=nar_preset if nar_mode == "preset" else None,
|
| 437 |
ref_audio=nar_audio if nar_mode == "clone" and nar_audio else None,
|
| 438 |
ref_text=nar_ref_text if nar_mode == "clone" else None,
|
| 439 |
design_desc=nar_design if nar_mode == "design" else None,
|
| 440 |
-
instruct=nar_instruct, language=nar_lang,
|
|
|
|
| 441 |
)
|
| 442 |
char_configs = {}
|
| 443 |
for i in range(8):
|
|
@@ -463,7 +515,6 @@ def do_load_project(json_str):
|
|
| 463 |
nar = data["narrator"]
|
| 464 |
chars = data.get("characters", {})
|
| 465 |
|
| 466 |
-
# Build updates for narrator
|
| 467 |
nar_updates = [
|
| 468 |
gr.update(value=nar.mode),
|
| 469 |
gr.update(value=nar.preset if nar.preset else "Ryan", visible=nar.mode=="preset"),
|
|
@@ -475,7 +526,6 @@ def do_load_project(json_str):
|
|
| 475 |
gr.update(value=nar.speed),
|
| 476 |
]
|
| 477 |
|
| 478 |
-
# Build updates for characters (up to 8)
|
| 479 |
char_updates = []
|
| 480 |
char_items = list(chars.items())[:8]
|
| 481 |
for i in range(8):
|
|
@@ -493,6 +543,8 @@ def do_load_project(json_str):
|
|
| 493 |
gr.update(value=c.instruct, visible=True),
|
| 494 |
gr.update(value=c.language, visible=True),
|
| 495 |
gr.update(value=c.speed, visible=True),
|
|
|
|
|
|
|
| 496 |
])
|
| 497 |
else:
|
| 498 |
char_updates.extend([
|
|
@@ -507,55 +559,16 @@ def do_load_project(json_str):
|
|
| 507 |
gr.update(visible=False),
|
| 508 |
gr.update(visible=False),
|
| 509 |
gr.update(visible=False),
|
|
|
|
|
|
|
| 510 |
])
|
| 511 |
|
| 512 |
text_sample = data.get("text_sample", "")
|
| 513 |
return [text_sample] + nar_updates + char_updates + [f"Project loaded! {len(chars)} characters configured."]
|
| 514 |
-
except Exception as e:
|
| 515 |
-
return [""] + [gr.update()]*43 + [f"Error loading project: {e}"]
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
# ---------------------------------------------------------------------------
|
| 519 |
-
# Quick Generate
|
| 520 |
-
# ---------------------------------------------------------------------------
|
| 521 |
-
|
| 522 |
-
@spaces.GPU(duration=180)
|
| 523 |
-
def quick_generate_gpu(text, narrator_preset, gen_temp, gen_seed, output_fmt):
|
| 524 |
-
"""One-click generation with all defaults."""
|
| 525 |
-
if not text or len(text.strip()) < 50:
|
| 526 |
-
return None, "Error: Text too short."
|
| 527 |
-
|
| 528 |
-
pipe = get_pipeline()
|
| 529 |
-
nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
|
| 530 |
-
language="English", speed=1.0)
|
| 531 |
-
|
| 532 |
-
def prog_cb(ratio: float, msg: str):
|
| 533 |
-
print(f"[{ratio*100:.0f}%] {msg}")
|
| 534 |
-
|
| 535 |
-
try:
|
| 536 |
-
output_path, seg_paths, seg_meta = pipe.generate(
|
| 537 |
-
text=text,
|
| 538 |
-
narrator_config=nar_cfg,
|
| 539 |
-
character_configs={},
|
| 540 |
-
progress_callback=prog_cb,
|
| 541 |
-
temperature=gen_temp,
|
| 542 |
-
seed=int(gen_seed),
|
| 543 |
-
)
|
| 544 |
-
|
| 545 |
-
extra_path = None
|
| 546 |
-
if output_fmt == "wav":
|
| 547 |
-
extra_path = output_path.replace(".mp3", ".wav")
|
| 548 |
-
from backend import save_audiobook
|
| 549 |
-
save_audiobook(seg_paths, extra_path, fmt="wav")
|
| 550 |
-
elif output_fmt == "zip":
|
| 551 |
-
extra_path = pipe.export_segments_zip(seg_paths)
|
| 552 |
-
|
| 553 |
-
final_path = extra_path if extra_path else output_path
|
| 554 |
-
return final_path, f"Quick audiobook ready! {len(seg_meta)} segments."
|
| 555 |
except Exception as e:
|
| 556 |
import traceback
|
| 557 |
traceback.print_exc()
|
| 558 |
-
return
|
| 559 |
|
| 560 |
|
| 561 |
# ---------------------------------------------------------------------------
|
|
@@ -637,7 +650,7 @@ def build_app():
|
|
| 637 |
quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
|
| 638 |
quick_status = gr.Textbox(show_label=False, interactive=False)
|
| 639 |
gr.Markdown("---")
|
| 640 |
-
gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text
|
| 641 |
|
| 642 |
with gr.Row():
|
| 643 |
chapter_selector = gr.Dropdown(
|
|
@@ -665,7 +678,6 @@ def build_app():
|
|
| 665 |
outputs=[quick_audio, quick_status],
|
| 666 |
)
|
| 667 |
|
| 668 |
-
# Chapter detection
|
| 669 |
def refresh_chapters(text):
|
| 670 |
if not text:
|
| 671 |
return gr.update(choices=["All"], value="All")
|
|
@@ -703,11 +715,11 @@ def build_app():
|
|
| 703 |
|
| 704 |
with gr.Column(scale=2):
|
| 705 |
gr.Markdown("## Character Voices")
|
| 706 |
-
gr.Markdown("Configure up to 8 characters.
|
| 707 |
|
| 708 |
char_names, char_descs, char_modes, char_presets = [], [], [], []
|
| 709 |
char_audios, char_ref_texts, char_designs, char_instructs, char_langs, char_speeds = [], [], [], [], [], []
|
| 710 |
-
char_rows, char_preview_btns, char_preview_audios = [], [], []
|
| 711 |
|
| 712 |
for i in range(8):
|
| 713 |
visible_default = (i == 0)
|
|
@@ -727,12 +739,13 @@ def build_app():
|
|
| 727 |
with gr.Row():
|
| 728 |
cpv_btn = gr.Button("🔊 Preview", variant="secondary", visible=visible_default)
|
| 729 |
cpv_audio = gr.Audio(label="Preview", interactive=False, visible=visible_default)
|
|
|
|
| 730 |
|
| 731 |
cm.change(on_mode_change, inputs=cm, outputs=[cp, ca, crt, cdes])
|
| 732 |
cpv_btn.click(
|
| 733 |
preview_char_voice_gpu,
|
| 734 |
inputs=[cn, cm, cp, ca, crt, cdes, cinstr, cl, cspd],
|
| 735 |
-
outputs=[cpv_audio,
|
| 736 |
)
|
| 737 |
|
| 738 |
char_rows.append(row)
|
|
@@ -748,6 +761,7 @@ def build_app():
|
|
| 748 |
char_speeds.append(cspd)
|
| 749 |
char_preview_btns.append(cpv_btn)
|
| 750 |
char_preview_audios.append(cpv_audio)
|
|
|
|
| 751 |
|
| 752 |
# ==================== TAB 3: Generate ====================
|
| 753 |
with gr.TabItem("⚡ Generate"):
|
|
@@ -833,6 +847,7 @@ def build_app():
|
|
| 833 |
gr.update(value=chars[i].get("speed", 1.0), visible=True),
|
| 834 |
gr.update(visible=True),
|
| 835 |
gr.update(visible=True),
|
|
|
|
| 836 |
])
|
| 837 |
else:
|
| 838 |
updates.extend([
|
|
@@ -849,6 +864,7 @@ def build_app():
|
|
| 849 |
gr.update(visible=False),
|
| 850 |
gr.update(visible=False),
|
| 851 |
gr.update(visible=False),
|
|
|
|
| 852 |
])
|
| 853 |
return [status] + updates
|
| 854 |
|
|
@@ -856,7 +872,7 @@ def build_app():
|
|
| 856 |
item for sublist in [
|
| 857 |
[char_rows[i], char_names[i], char_descs[i], char_modes[i], char_presets[i],
|
| 858 |
char_audios[i], char_ref_texts[i], char_designs[i], char_instructs[i], char_langs[i],
|
| 859 |
-
char_speeds[i], char_preview_btns[i], char_preview_audios[i]]
|
| 860 |
for i in range(8)
|
| 861 |
] for item in sublist
|
| 862 |
]
|
|
@@ -868,26 +884,16 @@ def build_app():
|
|
| 868 |
char_audios + char_ref_texts + char_designs + char_instructs + char_langs + char_speeds
|
| 869 |
)
|
| 870 |
|
| 871 |
-
def get_text_for_gen(story_text, chapter_sel):
|
| 872 |
-
if chapter_sel == "All" or not chapter_sel:
|
| 873 |
-
return story_text
|
| 874 |
-
# Extract chapter index
|
| 875 |
-
try:
|
| 876 |
-
idx = int(chapter_sel.split(":")[0].replace("Ch", "")) - 1
|
| 877 |
-
return get_chapter_text(story_text, idx)
|
| 878 |
-
except:
|
| 879 |
-
return story_text
|
| 880 |
-
|
| 881 |
-
def wrapped_generate(story_text, chapter_sel, *args):
|
| 882 |
-
text = get_text_for_gen(story_text, chapter_sel)
|
| 883 |
-
return generate_audiobook_gpu(text, *args)
|
| 884 |
-
|
| 885 |
gen_inputs = [
|
| 886 |
story_input, chapter_selector,
|
| 887 |
nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 888 |
gen_temp, gen_seed, output_fmt,
|
| 889 |
] + all_char_inputs
|
| 890 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
gen_btn.click(
|
| 892 |
wrapped_generate,
|
| 893 |
inputs=gen_inputs,
|
|
@@ -895,20 +901,14 @@ def build_app():
|
|
| 895 |
)
|
| 896 |
|
| 897 |
# ---------- Project wiring ----------
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
] + all_char_inputs + [gen_temp, gen_seed],
|
| 904 |
-
outputs=[project_json],
|
| 905 |
-
)
|
| 906 |
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
inputs=[load_json],
|
| 910 |
-
outputs=[story_input, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed] + extract_outputs[1:] + [load_status],
|
| 911 |
-
)
|
| 912 |
|
| 913 |
return demo
|
| 914 |
|
|
|
|
| 6 |
|
| 7 |
import os
|
| 8 |
import json
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
from typing import Dict, List, Optional
|
| 11 |
|
|
|
|
| 146 |
border-color: #334155 !important;
|
| 147 |
}
|
| 148 |
|
|
|
|
| 149 |
input[type="checkbox"] + label,
|
| 150 |
.checkbox-label,
|
| 151 |
.gr-checkbox label {
|
| 152 |
color: #f8fafc !important;
|
| 153 |
}
|
| 154 |
|
|
|
|
| 155 |
li, .prose li, .gr-prose li {
|
| 156 |
color: #cbd5e1 !important;
|
| 157 |
}
|
| 158 |
|
|
|
|
| 159 |
strong, b {
|
| 160 |
color: #f8fafc !important;
|
| 161 |
}
|
| 162 |
|
|
|
|
| 163 |
code {
|
| 164 |
background: #334155 !important;
|
| 165 |
color: #22d3ee !important;
|
|
|
|
| 167 |
border-radius: 4px !important;
|
| 168 |
}
|
| 169 |
|
|
|
|
| 170 |
progress {
|
| 171 |
width: 100%;
|
| 172 |
height: 8px;
|
|
|
|
| 182 |
border-radius: 4px;
|
| 183 |
}
|
| 184 |
|
|
|
|
| 185 |
.seg-item {
|
| 186 |
background: #0f172a;
|
| 187 |
border: 1px solid #334155;
|
|
|
|
| 207 |
# ---------------------------------------------------------------------------
|
| 208 |
|
| 209 |
_pipeline: Optional[AudiobookPipeline] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
|
| 212 |
def get_pipeline() -> AudiobookPipeline:
|
|
|
|
| 247 |
pipe = get_pipeline()
|
| 248 |
text, fname = pipe.parse_upload(file_obj)
|
| 249 |
text = pipe.processor.clean_text(text)
|
| 250 |
+
chs = pipe.detect_chapters(text)
|
| 251 |
+
ch_info = " | ".join([f"Ch{c['idx']+1}: {c['word_count']}w" for c in chs[:5]])
|
| 252 |
+
if len(chs) > 5:
|
| 253 |
+
ch_info += f" (+{len(chs)-5} more)"
|
|
|
|
|
|
|
| 254 |
wc = len(text.split())
|
| 255 |
dur = estimate_duration(wc)
|
| 256 |
+
return text, f"Loaded {fname} — {wc} words (~{dur}) | {ch_info if chs else '1 section'}"
|
| 257 |
except Exception as e:
|
| 258 |
return "", f"Error: {e}"
|
| 259 |
|
|
|
|
| 267 |
return chars, status
|
| 268 |
|
| 269 |
|
| 270 |
+
def get_chapter_text(text: str, chapter_sel: str) -> str:
|
| 271 |
+
if not text or chapter_sel == "All" or not chapter_sel:
|
| 272 |
+
return text
|
| 273 |
+
try:
|
| 274 |
+
idx = int(chapter_sel.split(":")[0].replace("Ch", "")) - 1
|
| 275 |
+
pipe = get_pipeline()
|
| 276 |
+
return pipe.get_chapter_text(text, idx)
|
| 277 |
+
except Exception:
|
| 278 |
+
return text
|
| 279 |
|
| 280 |
|
| 281 |
# ---------------------------------------------------------------------------
|
|
|
|
| 286 |
def generate_audiobook_gpu(
|
| 287 |
text,
|
| 288 |
nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 289 |
+
gen_temp, gen_seed, output_fmt, *args
|
|
|
|
| 290 |
):
|
| 291 |
if not text or len(text.strip()) < 50:
|
| 292 |
+
return None, "", "Error: Please provide at least 50 characters of story text.", ""
|
| 293 |
+
|
| 294 |
+
# Unpack character args (80 values = 8 chars x 10 fields)
|
| 295 |
+
names = list(args[0:8])
|
| 296 |
+
descs = list(args[8:16])
|
| 297 |
+
modes = list(args[16:24])
|
| 298 |
+
presets = list(args[24:32])
|
| 299 |
+
audios = list(args[32:40])
|
| 300 |
+
ref_texts = list(args[40:48])
|
| 301 |
+
designs = list(args[48:56])
|
| 302 |
+
instructs = list(args[56:64])
|
| 303 |
+
langs = list(args[64:72])
|
| 304 |
+
speeds = list(args[72:80])
|
| 305 |
|
| 306 |
pipe = get_pipeline()
|
| 307 |
|
|
|
|
| 314 |
design_desc=nar_design if nar_mode == "design" else None,
|
| 315 |
instruct=nar_instruct,
|
| 316 |
language=nar_lang,
|
| 317 |
+
speed=float(nar_speed) if nar_speed else 1.0,
|
| 318 |
)
|
| 319 |
|
| 320 |
char_configs = {}
|
|
|
|
| 350 |
temperature=gen_temp,
|
| 351 |
seed=int(gen_seed),
|
| 352 |
)
|
|
|
|
|
|
|
|
|
|
| 353 |
|
|
|
|
| 354 |
seg_html = "<div style='max-height: 300px; overflow-y: auto;'>"
|
| 355 |
for s in seg_meta[:50]:
|
| 356 |
tclass = "narration" if s['type'] == 'narration' else "dialogue"
|
|
|
|
| 359 |
seg_html += f"<div style='text-align:center;color:#94a3b8;padding:0.5rem;'>... and {len(seg_meta)-50} more segments</div>"
|
| 360 |
seg_html += "</div>"
|
| 361 |
|
|
|
|
| 362 |
extra_path = None
|
| 363 |
if output_fmt == "wav":
|
| 364 |
extra_path = output_path.replace(".mp3", ".wav")
|
|
|
|
| 387 |
design_desc=design if mode == "design" else None,
|
| 388 |
instruct=instruct,
|
| 389 |
language=lang,
|
| 390 |
+
speed=float(speed) if speed else 1.0,
|
| 391 |
)
|
| 392 |
try:
|
| 393 |
wav, sr = pipe.preview_voice(vc)
|
|
|
|
| 422 |
return None, f"Preview failed: {e}"
|
| 423 |
|
| 424 |
|
| 425 |
+
# ---------------------------------------------------------------------------
|
| 426 |
+
# Quick Generate
|
| 427 |
+
# ---------------------------------------------------------------------------
|
| 428 |
+
|
| 429 |
+
@spaces.GPU(duration=180)
|
| 430 |
+
def quick_generate_gpu(text, narrator_preset, gen_temp, output_fmt, gen_seed=42):
|
| 431 |
+
if not text or len(text.strip()) < 50:
|
| 432 |
+
return None, "Error: Text too short."
|
| 433 |
+
|
| 434 |
+
pipe = get_pipeline()
|
| 435 |
+
nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
|
| 436 |
+
language="English", speed=1.0)
|
| 437 |
+
|
| 438 |
+
def prog_cb(ratio: float, msg: str):
|
| 439 |
+
print(f"[{ratio*100:.0f}%] {msg}")
|
| 440 |
+
|
| 441 |
+
try:
|
| 442 |
+
output_path, seg_paths, seg_meta = pipe.generate(
|
| 443 |
+
text=text,
|
| 444 |
+
narrator_config=nar_cfg,
|
| 445 |
+
character_configs={},
|
| 446 |
+
progress_callback=prog_cb,
|
| 447 |
+
temperature=gen_temp,
|
| 448 |
+
seed=int(gen_seed),
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
extra_path = None
|
| 452 |
+
if output_fmt == "wav":
|
| 453 |
+
extra_path = output_path.replace(".mp3", ".wav")
|
| 454 |
+
from backend import save_audiobook
|
| 455 |
+
save_audiobook(seg_paths, extra_path, fmt="wav")
|
| 456 |
+
elif output_fmt == "zip":
|
| 457 |
+
extra_path = pipe.export_segments_zip(seg_paths)
|
| 458 |
+
|
| 459 |
+
final_path = extra_path if extra_path else output_path
|
| 460 |
+
return final_path, f"Quick audiobook ready! {len(seg_meta)} segments."
|
| 461 |
+
except Exception as e:
|
| 462 |
+
import traceback
|
| 463 |
+
traceback.print_exc()
|
| 464 |
+
return None, f"Error: {str(e)}"
|
| 465 |
+
|
| 466 |
+
|
| 467 |
# ---------------------------------------------------------------------------
|
| 468 |
# Project Save/Load
|
| 469 |
# ---------------------------------------------------------------------------
|
| 470 |
|
| 471 |
+
def do_save_project(text, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed, *args):
|
| 472 |
+
# Unpack character args (80 values) + gen_temp + gen_seed
|
| 473 |
+
names = list(args[0:8])
|
| 474 |
+
descs = list(args[8:16])
|
| 475 |
+
modes = list(args[16:24])
|
| 476 |
+
presets = list(args[24:32])
|
| 477 |
+
audios = list(args[32:40])
|
| 478 |
+
ref_texts = list(args[40:48])
|
| 479 |
+
designs = list(args[48:56])
|
| 480 |
+
instructs = list(args[56:64])
|
| 481 |
+
langs = list(args[64:72])
|
| 482 |
+
speeds = list(args[72:80])
|
| 483 |
+
gen_temp = args[80] if len(args) > 80 else 0.7
|
| 484 |
+
gen_seed = args[81] if len(args) > 81 else 42
|
| 485 |
+
|
| 486 |
nar_cfg = VoiceConfig(
|
| 487 |
name="Narrator", mode=nar_mode, preset=nar_preset if nar_mode == "preset" else None,
|
| 488 |
ref_audio=nar_audio if nar_mode == "clone" and nar_audio else None,
|
| 489 |
ref_text=nar_ref_text if nar_mode == "clone" else None,
|
| 490 |
design_desc=nar_design if nar_mode == "design" else None,
|
| 491 |
+
instruct=nar_instruct, language=nar_lang,
|
| 492 |
+
speed=float(nar_speed) if nar_speed else 1.0,
|
| 493 |
)
|
| 494 |
char_configs = {}
|
| 495 |
for i in range(8):
|
|
|
|
| 515 |
nar = data["narrator"]
|
| 516 |
chars = data.get("characters", {})
|
| 517 |
|
|
|
|
| 518 |
nar_updates = [
|
| 519 |
gr.update(value=nar.mode),
|
| 520 |
gr.update(value=nar.preset if nar.preset else "Ryan", visible=nar.mode=="preset"),
|
|
|
|
| 526 |
gr.update(value=nar.speed),
|
| 527 |
]
|
| 528 |
|
|
|
|
| 529 |
char_updates = []
|
| 530 |
char_items = list(chars.items())[:8]
|
| 531 |
for i in range(8):
|
|
|
|
| 543 |
gr.update(value=c.instruct, visible=True),
|
| 544 |
gr.update(value=c.language, visible=True),
|
| 545 |
gr.update(value=c.speed, visible=True),
|
| 546 |
+
gr.update(visible=True),
|
| 547 |
+
gr.update(visible=True),
|
| 548 |
])
|
| 549 |
else:
|
| 550 |
char_updates.extend([
|
|
|
|
| 559 |
gr.update(visible=False),
|
| 560 |
gr.update(visible=False),
|
| 561 |
gr.update(visible=False),
|
| 562 |
+
gr.update(visible=False),
|
| 563 |
+
gr.update(visible=False),
|
| 564 |
])
|
| 565 |
|
| 566 |
text_sample = data.get("text_sample", "")
|
| 567 |
return [text_sample] + nar_updates + char_updates + [f"Project loaded! {len(chars)} characters configured."]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
except Exception as e:
|
| 569 |
import traceback
|
| 570 |
traceback.print_exc()
|
| 571 |
+
return [""] + [gr.update()] * 8 + [gr.update(visible=False)] * 104 + [f"Error loading project: {e}"]
|
| 572 |
|
| 573 |
|
| 574 |
# ---------------------------------------------------------------------------
|
|
|
|
| 650 |
quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
|
| 651 |
quick_status = gr.Textbox(show_label=False, interactive=False)
|
| 652 |
gr.Markdown("---")
|
| 653 |
+
gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text.")
|
| 654 |
|
| 655 |
with gr.Row():
|
| 656 |
chapter_selector = gr.Dropdown(
|
|
|
|
| 678 |
outputs=[quick_audio, quick_status],
|
| 679 |
)
|
| 680 |
|
|
|
|
| 681 |
def refresh_chapters(text):
|
| 682 |
if not text:
|
| 683 |
return gr.update(choices=["All"], value="All")
|
|
|
|
| 715 |
|
| 716 |
with gr.Column(scale=2):
|
| 717 |
gr.Markdown("## Character Voices")
|
| 718 |
+
gr.Markdown("Configure up to 8 characters. Use **preset** for built-in speakers, **clone** to upload a voice sample, or **design** to describe a voice from text.")
|
| 719 |
|
| 720 |
char_names, char_descs, char_modes, char_presets = [], [], [], []
|
| 721 |
char_audios, char_ref_texts, char_designs, char_instructs, char_langs, char_speeds = [], [], [], [], [], []
|
| 722 |
+
char_rows, char_preview_btns, char_preview_audios, char_preview_statuses = [], [], [], []
|
| 723 |
|
| 724 |
for i in range(8):
|
| 725 |
visible_default = (i == 0)
|
|
|
|
| 739 |
with gr.Row():
|
| 740 |
cpv_btn = gr.Button("🔊 Preview", variant="secondary", visible=visible_default)
|
| 741 |
cpv_audio = gr.Audio(label="Preview", interactive=False, visible=visible_default)
|
| 742 |
+
cpv_status = gr.Textbox(show_label=False, interactive=False, visible=visible_default)
|
| 743 |
|
| 744 |
cm.change(on_mode_change, inputs=cm, outputs=[cp, ca, crt, cdes])
|
| 745 |
cpv_btn.click(
|
| 746 |
preview_char_voice_gpu,
|
| 747 |
inputs=[cn, cm, cp, ca, crt, cdes, cinstr, cl, cspd],
|
| 748 |
+
outputs=[cpv_audio, cpv_status],
|
| 749 |
)
|
| 750 |
|
| 751 |
char_rows.append(row)
|
|
|
|
| 761 |
char_speeds.append(cspd)
|
| 762 |
char_preview_btns.append(cpv_btn)
|
| 763 |
char_preview_audios.append(cpv_audio)
|
| 764 |
+
char_preview_statuses.append(cpv_status)
|
| 765 |
|
| 766 |
# ==================== TAB 3: Generate ====================
|
| 767 |
with gr.TabItem("⚡ Generate"):
|
|
|
|
| 847 |
gr.update(value=chars[i].get("speed", 1.0), visible=True),
|
| 848 |
gr.update(visible=True),
|
| 849 |
gr.update(visible=True),
|
| 850 |
+
gr.update(visible=True),
|
| 851 |
])
|
| 852 |
else:
|
| 853 |
updates.extend([
|
|
|
|
| 864 |
gr.update(visible=False),
|
| 865 |
gr.update(visible=False),
|
| 866 |
gr.update(visible=False),
|
| 867 |
+
gr.update(visible=False),
|
| 868 |
])
|
| 869 |
return [status] + updates
|
| 870 |
|
|
|
|
| 872 |
item for sublist in [
|
| 873 |
[char_rows[i], char_names[i], char_descs[i], char_modes[i], char_presets[i],
|
| 874 |
char_audios[i], char_ref_texts[i], char_designs[i], char_instructs[i], char_langs[i],
|
| 875 |
+
char_speeds[i], char_preview_btns[i], char_preview_audios[i], char_preview_statuses[i]]
|
| 876 |
for i in range(8)
|
| 877 |
] for item in sublist
|
| 878 |
]
|
|
|
|
| 884 |
char_audios + char_ref_texts + char_designs + char_instructs + char_langs + char_speeds
|
| 885 |
)
|
| 886 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 887 |
gen_inputs = [
|
| 888 |
story_input, chapter_selector,
|
| 889 |
nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 890 |
gen_temp, gen_seed, output_fmt,
|
| 891 |
] + all_char_inputs
|
| 892 |
|
| 893 |
+
def wrapped_generate(story_text, chapter_sel, *args):
|
| 894 |
+
text = get_chapter_text(story_text, chapter_sel)
|
| 895 |
+
return generate_audiobook_gpu(text, *args)
|
| 896 |
+
|
| 897 |
gen_btn.click(
|
| 898 |
wrapped_generate,
|
| 899 |
inputs=gen_inputs,
|
|
|
|
| 901 |
)
|
| 902 |
|
| 903 |
# ---------- Project wiring ----------
|
| 904 |
+
save_inputs = [
|
| 905 |
+
story_input,
|
| 906 |
+
nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
|
| 907 |
+
] + all_char_inputs + [gen_temp, gen_seed]
|
| 908 |
+
save_btn.click(do_save_project, inputs=save_inputs, outputs=[project_json])
|
|
|
|
|
|
|
|
|
|
| 909 |
|
| 910 |
+
load_outputs = [story_input, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed] + extract_outputs[1:] + [load_status]
|
| 911 |
+
load_btn.click(do_load_project, inputs=[load_json], outputs=load_outputs)
|
|
|
|
|
|
|
|
|
|
| 912 |
|
| 913 |
return demo
|
| 914 |
|