Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on 16 days ago

Commit

f76e04a

verified ·

1 Parent(s): 193b0d8

Update Modules/Generate_Speech.py

Browse files

Files changed (1) hide show

Modules/Generate_Speech.py +27 -80

Modules/Generate_Speech.py CHANGED Viewed

@@ -643,88 +643,35 @@ def _generate_supertonic(text: str, speed: float, voice: str, steps: int, silenc
     return output_path
-def build_interface() -> gr.Blocks:
     kokoro_voices = get_kokoro_voices()
     supertonic_voices = get_supertonic_voices()
-    with gr.Blocks(title="Generate Speech") as demo:
-        gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Generate Speech</h1>")
-        gr.Markdown("<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>")
-        with gr.Row():
-            with gr.Column():
-                text_input = gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4)
-                model_dropdown = gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic")
-                # Voice dropdown needs to update based on model
-                voice_dropdown = gr.Dropdown(
-                    label="Voice",
-                    choices=supertonic_voices,
-                    value="F1",
-                    info="Select voice"
-                )
-                speed_slider = gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed")
-                # Supertonic specific
-                with gr.Group() as supertonic_params:
-                    steps_slider = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only")
-                    silence_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only")
-                    chunk_slider = gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only")
-                with gr.Row():
-                    clear_btn = gr.Button("Clear")
-                    gen_btn = gr.Button("Generate", variant="primary")
-            with gr.Column():
-                audio_output = gr.Audio(label="Audio", type="filepath", format="wav")
-        def update_voices(model_name):
-            if model_name == "Kokoro":
-                return {
-                    voice_dropdown: gr.Dropdown(choices=kokoro_voices, value="af_heart"),
-                    supertonic_params: gr.Group(visible=False)
-                }
-            else:
-                return {
-                    voice_dropdown: gr.Dropdown(choices=supertonic_voices, value="F1"),
-                    supertonic_params: gr.Group(visible=True)
-                }
-        def clear_inputs():
-            return [
-                "",           # text_input
-                "Supertonic", # model_dropdown
-                "F1",         # voice_dropdown
-                1.3,          # speed_slider
-                5,            # steps_slider
-                0.3,          # silence_slider
-                300,          # chunk_slider
-                None          # audio_output
-            ]
-        clear_btn.click(
-            fn=clear_inputs,
-            inputs=[],
-            outputs=[text_input, model_dropdown, voice_dropdown, speed_slider, steps_slider, silence_slider, chunk_slider, audio_output]
-        )
-        model_dropdown.change(
-            fn=update_voices,
-            inputs=[model_dropdown],
-            outputs=[voice_dropdown, supertonic_params]
-        )
-        gen_btn.click(
-            fn=Generate_Speech,
-            inputs=[text_input, model_dropdown, speed_slider, voice_dropdown, steps_slider, silence_slider, chunk_slider],
-            outputs=[audio_output]
-        )
-        # Expose the function for API
-        demo.fn = Generate_Speech
-    return demo
 __all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]

     return output_path
+def build_interface() -> gr.Interface:
     kokoro_voices = get_kokoro_voices()
     supertonic_voices = get_supertonic_voices()
+    all_voices = sorted(list(set(kokoro_voices + supertonic_voices)))
+    return gr.Interface(
+        fn=Generate_Speech,
+        inputs=[
+            gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
+            gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
+            gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
+            gr.Dropdown(
+                label="Voice",
+                choices=all_voices,
+                value="F1",
+                info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
+            ),
+            gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps (Supertonic only)"),
+            gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration (Supertonic only)"),
+            gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size (Supertonic only)"),
+        ],
+        outputs=gr.Audio(label="Audio", type="filepath", format="wav"),
+        title="Generate Speech",
+        description=(
+            "<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>"
+        ),
+        api_description=TOOL_SUMMARY,
+        flagging_mode="never",
+    )
 __all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]