Spaces:

jkorstad
/

AudioBook

Running on Zero

App Files Files Community

jkorstad commited on Apr 23

Commit

e68879f

1 Parent(s): 70f6a37

Quick Generate: add full mode selector (preset/clone/design) for narrator voice on Story tab.

Browse files

Files changed (1) hide show

app.py +27 -17

app.py CHANGED Viewed

@@ -449,13 +449,22 @@ def preview_char_voice_gpu(name, mode, preset, audio, ref_text, design, instruct
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=180)
-def quick_generate_gpu(text, narrator_preset, gen_temp, output_fmt, gen_seed=42):
     if not text or len(text.strip()) < 50:
         return None, "Error: Text too short."
     pipe = get_pipeline()
-    nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
-                          language="English", speed=1.0)
     def prog_cb(ratio: float, msg: str):
         print(f"[{ratio*100:.0f}%] {msg}")
@@ -659,22 +668,21 @@ def build_app():
                             stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
                         gr.Markdown("---")
                         gr.Markdown("### Quick Generate")
-                        quick_preset = gr.Dropdown(
-                            choices=list(PRESET_SPEAKERS.keys()),
-                            value="Ryan",
-                            label="Narrator Voice",
-                        )
                         quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
-                        quick_fmt = gr.Dropdown(
-                            choices=["mp3", "wav", "zip"],
-                            value="mp3",
-                            label="Output Format",
-                        )
                         quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
-                        quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
                         quick_status = gr.Textbox(show_label=False, interactive=False)
                         gr.Markdown("---")
-                        gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text.")
                 with gr.Row():
                     chapter_selector = gr.Dropdown(
@@ -698,10 +706,12 @@ def build_app():
                 story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
                 quick_btn.click(
                     quick_generate_gpu,
-                    inputs=[story_input, quick_preset, quick_temp, quick_fmt],
-                    outputs=[quick_audio, quick_status],
                 )
                 def refresh_chapters(text):
                     if not text:
                         return gr.update(choices=["All"], value="All")

 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=180)
+def quick_generate_gpu(text, mode, preset, audio, ref_text, design, instruct, lang, speed, gen_temp, output_fmt, gen_seed=42):
     if not text or len(text.strip()) < 50:
         return None, "Error: Text too short."
     pipe = get_pipeline()
+    nar_cfg = VoiceConfig(
+        name="Narrator",
+        mode=mode,
+        preset=preset if mode == "preset" else None,
+        ref_audio=audio if mode == "clone" and audio else None,
+        ref_text=ref_text if mode == "clone" else None,
+        design_desc=design if mode == "design" else None,
+        instruct=instruct or "Narrate clearly and expressively.",
+        language=lang,
+        speed=float(speed) if speed else 1.0,
+    )
     def prog_cb(ratio: float, msg: str):
         print(f"[{ratio*100:.0f}%] {msg}")
                             stat_dur = gr.Textbox(label="Est. Duration", value="0 sec", interactive=False)
                         gr.Markdown("---")
                         gr.Markdown("### Quick Generate")
+                        quick_mode = gr.Dropdown(choices=["preset", "clone", "design"], value="design", label="Narrator Mode")
+                        quick_preset = gr.Dropdown(choices=list(PRESET_SPEAKERS.keys()), value="Ryan", label="Preset Voice", visible=False)
+                        quick_audio = gr.Audio(label="Upload Voice Sample (3–10s)", type="filepath", visible=False)
+                        quick_ref_text = gr.Textbox(label="Reference Transcript", placeholder="What does the sample say?", visible=False)
+                        quick_design = gr.TextArea(label="Voice Description", placeholder="e.g. A warm, raspy baritone with measured pacing...", visible=True, lines=2, value="A clear, warm, expressive audiobook narrator voice with professional pacing and rich tone.")
+                        quick_instruct = gr.Textbox(label="Style Instruction", placeholder="e.g. Calm, measured storytelling.", value="")
+                        quick_lang = gr.Dropdown(choices=["English", "Chinese", "Japanese", "Korean", "German", "French", "Spanish", "Italian", "Portuguese", "Russian"], value="English", label="Language")
+                        quick_speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
                         quick_temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
+                        quick_fmt = gr.Dropdown(choices=["mp3", "wav", "zip"], value="mp3", label="Output Format")
                         quick_btn = gr.Button("⚡ Quick Generate", variant="primary")
+                        quick_output_audio = gr.Audio(label="Quick Audiobook", interactive=False)
                         quick_status = gr.Textbox(show_label=False, interactive=False)
                         gr.Markdown("---")
+                        gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text. Supports preset, clone, or AI-designed voices.")
                 with gr.Row():
                     chapter_selector = gr.Dropdown(
                 story_input.change(update_stats, inputs=[story_input], outputs=[stat_words, stat_dur])
                 quick_btn.click(
                     quick_generate_gpu,
+                    inputs=[story_input, quick_mode, quick_preset, quick_audio, quick_ref_text, quick_design, quick_instruct, quick_lang, quick_speed, quick_temp, quick_fmt],
+                    outputs=[quick_output_audio, quick_status],
                 )
+                quick_mode.change(on_mode_change, inputs=quick_mode, outputs=[quick_preset, quick_audio, quick_ref_text, quick_design])
                 def refresh_chapters(text):
                     if not text:
                         return gr.update(choices=["All"], value="All")