jkorstad commited on
Commit
e68879f
·
1 Parent(s): 70f6a37

Quick Generate: add full mode selector (preset/clone/design) for narrator voice on Story tab.

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -449,13 +449,22 @@ def preview_char_voice_gpu(name, mode, preset, audio, ref_text, design, instruct
449
  # ---------------------------------------------------------------------------
450
 
451
  @spaces.GPU(duration=180)
452
- def quick_generate_gpu(text, narrator_preset, gen_temp, output_fmt, gen_seed=42):
453
  if not text or len(text.strip()) < 50:
454
  return None, "Error: Text too short."
455
 
456
  pipe = get_pipeline()
457
- nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
458
- language="English", speed=1.0)
 
 
 
 
 
 
 
 
 
459
 
460
  def prog_cb(ratio: float, msg: str):
461
  print(f"[{ratio*100:.0f}%] {msg}")
@@ -659,22 +668,21 @@ def build_app():
659
  stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
660
  gr.Markdown("---")
661
  gr.Markdown("### Quick Generate")
662
- quick_preset = gr.Dropdown(
663
- choices=list(PRESET_SPEAKERS.keys()),
664
- value="Ryan",
665
- label="Narrator Voice",
666
- )
 
 
 
667
  quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
668
- quick_fmt = gr.Dropdown(
669
- choices=["mp3", "wav", "zip"],
670
- value="mp3",
671
- label="Output Format",
672
- )
673
  quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
674
- quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
675
  quick_status = gr.Textbox(show_label=False, interactive=False)
676
  gr.Markdown("---")
677
- gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text.")
678
 
679
  with gr.Row():
680
  chapter_selector = gr.Dropdown(
@@ -698,10 +706,12 @@ def build_app():
698
  story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
699
  quick_btn.click(
700
  quick_generate_gpu,
701
- inputs=[story_input, quick_preset, quick_temp, quick_fmt],
702
- outputs=[quick_audio, quick_status],
703
  )
704
 
 
 
705
  def refresh_chapters(text):
706
  if not text:
707
  return gr.update(choices=["All"], value="All")
 
449
  # ---------------------------------------------------------------------------
450
 
451
  @spaces.GPU(duration=180)
452
+ def quick_generate_gpu(text, mode, preset, audio, ref_text, design, instruct, lang, speed, gen_temp, output_fmt, gen_seed=42):
453
  if not text or len(text.strip()) < 50:
454
  return None, "Error: Text too short."
455
 
456
  pipe = get_pipeline()
457
+ nar_cfg = VoiceConfig(
458
+ name="Narrator",
459
+ mode=mode,
460
+ preset=preset if mode == "preset" else None,
461
+ ref_audio=audio if mode == "clone" and audio else None,
462
+ ref_text=ref_text if mode == "clone" else None,
463
+ design_desc=design if mode == "design" else None,
464
+ instruct=instruct or "Narrate clearly and expressively.",
465
+ language=lang,
466
+ speed=float(speed) if speed else 1.0,
467
+ )
468
 
469
  def prog_cb(ratio: float, msg: str):
470
  print(f"[{ratio*100:.0f}%] {msg}")
 
668
  stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
669
  gr.Markdown("---")
670
  gr.Markdown("### Quick Generate")
671
+ quick_mode = gr.Dropdown(choices=["preset", "clone", "design"], value="design", label="Narrator Mode")
672
+ quick_preset = gr.Dropdown(choices=list(PRESET_SPEAKERS.keys()), value="Ryan", label="Preset Voice", visible=False)
673
+ quick_audio = gr.Audio(label="Upload Voice Sample (3–10s)", type="filepath", visible=False)
674
+ quick_ref_text = gr.Textbox(label="Reference Transcript", placeholder="What does the sample say?", visible=False)
675
+ quick_design = gr.TextArea(label="Voice Description", placeholder="e.g. A warm, raspy baritone with measured pacing...", visible=True, lines=2, value="A clear, warm, expressive audiobook narrator voice with professional pacing and rich tone.")
676
+ quick_instruct = gr.Textbox(label="Style Instruction", placeholder="e.g. Calm, measured storytelling.", value="")
677
+ quick_lang = gr.Dropdown(choices=["English", "Chinese", "Japanese", "Korean", "German", "French", "Spanish", "Italian", "Portuguese", "Russian"], value="English", label="Language")
678
+ quick_speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
679
  quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
680
+ quick_fmt = gr.Dropdown(choices=["mp3", "wav", "zip"], value="mp3", label="Output Format")
 
 
 
 
681
  quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
682
+ quick_output_audio = gr.Audio(label="Quick Audiobook", interactive=False)
683
  quick_status = gr.Textbox(show_label=False, interactive=False)
684
  gr.Markdown("---")
685
+ gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text. Supports preset, clone, or AI-designed voices.")
686
 
687
  with gr.Row():
688
  chapter_selector = gr.Dropdown(
 
706
  story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
707
  quick_btn.click(
708
  quick_generate_gpu,
709
+ inputs=[story_input, quick_mode, quick_preset, quick_audio, quick_ref_text, quick_design, quick_instruct, quick_lang, quick_speed, quick_temp, quick_fmt],
710
+ outputs=[quick_output_audio, quick_status],
711
  )
712
 
713
+ quick_mode.change(on_mode_change, inputs=quick_mode, outputs=[quick_preset, quick_audio, quick_ref_text, quick_design])
714
+
715
  def refresh_chapters(text):
716
  if not text:
717
  return gr.update(choices=["All"], value="All")