Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on 16 days ago

Commit

84fbcc2

verified ·

1 Parent(s): 195b607

Update Modules/Generate_Speech.py

Browse files

Files changed (1) hide show

Modules/Generate_Speech.py +6 -6

Modules/Generate_Speech.py CHANGED Viewed

@@ -508,8 +508,8 @@ def List_Supertonic_Voices() -> list[str]:
 # Single source of truth for the LLM-facing tool description
 TOOL_SUMMARY = (
     "Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
-    "Supertonic: faster, supports steps/silence/chunking. Default voice 'F1'. "
-    "Kokoro: slower, supports many languages/accents. Default voice 'af_heart'. "
     "Return the generated media to the user in this format `![Alt text](URL)`."
 )
@@ -521,10 +521,10 @@ def Generate_Speech(
     text: Annotated[str, "The text to synthesize (English)."],
     model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
     speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
     voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
-    steps: Annotated[int, "Diffusion steps for Supertonic (1-50). Higher = better quality but slower. Ignored for Kokoro."] = 5,
-    silence_duration: Annotated[float, "Silence duration between chunks for Supertonic (0.0-2.0s). Ignored for Kokoro."] = 0.3,
-    max_chunk_size: Annotated[int, "Max text chunk length for Supertonic (50-1000). Ignored for Kokoro."] = 300,
 ) -> str:
     _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
@@ -654,13 +654,13 @@ def build_interface() -> gr.Interface:
             gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
             gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
             gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
             gr.Dropdown(
                 label="Voice",
                 choices=all_voices,
                 value="F1",
                 info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
             ),
-            gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
             gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
             gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
         ],

 # Single source of truth for the LLM-facing tool description
 TOOL_SUMMARY = (
     "Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
+    "Supertonic: faster, supports steps/silence/chunking. "
+    "Kokoro: slower, supports many languages/accents. "
     "Return the generated media to the user in this format `![Alt text](URL)`."
 )
     text: Annotated[str, "The text to synthesize (English)."],
     model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
     speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
+    steps: Annotated[int, "Supertonic only. Diffusion steps (1-50). Higher = better quality but slower."] = 5,
     voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
+    silence_duration: Annotated[float, "Supertonic only. Silence duration between chunks (0.0-2.0s)."] = 0.3,
+    max_chunk_size: Annotated[int, "Supertonic only. Max text chunk length (50-1000)."] = 300,
 ) -> str:
     _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
             gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
             gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
             gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
+            gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
             gr.Dropdown(
                 label="Voice",
                 choices=all_voices,
                 value="F1",
                 info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
             ),
             gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
             gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
         ],