Quick Generate: add full mode selector (preset/clone/design) for narrator voice on Story tab.
Browse files
app.py
CHANGED
|
@@ -449,13 +449,22 @@ def preview_char_voice_gpu(name, mode, preset, audio, ref_text, design, instruct
|
|
| 449 |
# ---------------------------------------------------------------------------
|
| 450 |
|
| 451 |
@spaces.GPU(duration=180)
|
| 452 |
-
def quick_generate_gpu(text,
|
| 453 |
if not text or len(text.strip()) < 50:
|
| 454 |
return None, "Error: Text too short."
|
| 455 |
|
| 456 |
pipe = get_pipeline()
|
| 457 |
-
nar_cfg = VoiceConfig(
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
def prog_cb(ratio: float, msg: str):
|
| 461 |
print(f"[{ratio*100:.0f}%] {msg}")
|
|
@@ -659,22 +668,21 @@ def build_app():
|
|
| 659 |
stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
|
| 660 |
gr.Markdown("---")
|
| 661 |
gr.Markdown("### Quick Generate")
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
)
|
|
|
|
|
|
|
|
|
|
| 667 |
quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
|
| 668 |
-
quick_fmt = gr.Dropdown(
|
| 669 |
-
choices=["mp3", "wav", "zip"],
|
| 670 |
-
value="mp3",
|
| 671 |
-
label="Output Format",
|
| 672 |
-
)
|
| 673 |
quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
|
| 674 |
-
|
| 675 |
quick_status = gr.Textbox(show_label=False, interactive=False)
|
| 676 |
gr.Markdown("---")
|
| 677 |
-
gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text.")
|
| 678 |
|
| 679 |
with gr.Row():
|
| 680 |
chapter_selector = gr.Dropdown(
|
|
@@ -698,10 +706,12 @@ def build_app():
|
|
| 698 |
story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
|
| 699 |
quick_btn.click(
|
| 700 |
quick_generate_gpu,
|
| 701 |
-
inputs=[story_input, quick_preset, quick_temp, quick_fmt],
|
| 702 |
-
outputs=[
|
| 703 |
)
|
| 704 |
|
|
|
|
|
|
|
| 705 |
def refresh_chapters(text):
|
| 706 |
if not text:
|
| 707 |
return gr.update(choices=["All"], value="All")
|
|
|
|
| 449 |
# ---------------------------------------------------------------------------
|
| 450 |
|
| 451 |
@spaces.GPU(duration=180)
|
| 452 |
+
def quick_generate_gpu(text, mode, preset, audio, ref_text, design, instruct, lang, speed, gen_temp, output_fmt, gen_seed=42):
|
| 453 |
if not text or len(text.strip()) < 50:
|
| 454 |
return None, "Error: Text too short."
|
| 455 |
|
| 456 |
pipe = get_pipeline()
|
| 457 |
+
nar_cfg = VoiceConfig(
|
| 458 |
+
name="Narrator",
|
| 459 |
+
mode=mode,
|
| 460 |
+
preset=preset if mode == "preset" else None,
|
| 461 |
+
ref_audio=audio if mode == "clone" and audio else None,
|
| 462 |
+
ref_text=ref_text if mode == "clone" else None,
|
| 463 |
+
design_desc=design if mode == "design" else None,
|
| 464 |
+
instruct=instruct or "Narrate clearly and expressively.",
|
| 465 |
+
language=lang,
|
| 466 |
+
speed=float(speed) if speed else 1.0,
|
| 467 |
+
)
|
| 468 |
|
| 469 |
def prog_cb(ratio: float, msg: str):
|
| 470 |
print(f"[{ratio*100:.0f}%] {msg}")
|
|
|
|
| 668 |
stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
|
| 669 |
gr.Markdown("---")
|
| 670 |
gr.Markdown("### Quick Generate")
|
| 671 |
+
quick_mode = gr.Dropdown(choices=["preset", "clone", "design"], value="design", label="Narrator Mode")
|
| 672 |
+
quick_preset = gr.Dropdown(choices=list(PRESET_SPEAKERS.keys()), value="Ryan", label="Preset Voice", visible=False)
|
| 673 |
+
quick_audio = gr.Audio(label="Upload Voice Sample (3–10s)", type="filepath", visible=False)
|
| 674 |
+
quick_ref_text = gr.Textbox(label="Reference Transcript", placeholder="What does the sample say?", visible=False)
|
| 675 |
+
quick_design = gr.TextArea(label="Voice Description", placeholder="e.g. A warm, raspy baritone with measured pacing...", visible=True, lines=2, value="A clear, warm, expressive audiobook narrator voice with professional pacing and rich tone.")
|
| 676 |
+
quick_instruct = gr.Textbox(label="Style Instruction", placeholder="e.g. Calm, measured storytelling.", value="")
|
| 677 |
+
quick_lang = gr.Dropdown(choices=["English", "Chinese", "Japanese", "Korean", "German", "French", "Spanish", "Italian", "Portuguese", "Russian"], value="English", label="Language")
|
| 678 |
+
quick_speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
|
| 679 |
quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
|
| 680 |
+
quick_fmt = gr.Dropdown(choices=["mp3", "wav", "zip"], value="mp3", label="Output Format")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
|
| 682 |
+
quick_output_audio = gr.Audio(label="Quick Audiobook", interactive=False)
|
| 683 |
quick_status = gr.Textbox(show_label=False, interactive=False)
|
| 684 |
gr.Markdown("---")
|
| 685 |
+
gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text. Supports preset, clone, or AI-designed voices.")
|
| 686 |
|
| 687 |
with gr.Row():
|
| 688 |
chapter_selector = gr.Dropdown(
|
|
|
|
| 706 |
story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
|
| 707 |
quick_btn.click(
|
| 708 |
quick_generate_gpu,
|
| 709 |
+
inputs=[story_input, quick_mode, quick_preset, quick_audio, quick_ref_text, quick_design, quick_instruct, quick_lang, quick_speed, quick_temp, quick_fmt],
|
| 710 |
+
outputs=[quick_output_audio, quick_status],
|
| 711 |
)
|
| 712 |
|
| 713 |
+
quick_mode.change(on_mode_change, inputs=quick_mode, outputs=[quick_preset, quick_audio, quick_ref_text, quick_design])
|
| 714 |
+
|
| 715 |
def refresh_chapters(text):
|
| 716 |
if not text:
|
| 717 |
return gr.update(choices=["All"], value="All")
|