Update Modules/Generate_Speech.py
Browse files
Modules/Generate_Speech.py
CHANGED
|
@@ -508,8 +508,8 @@ def List_Supertonic_Voices() -> list[str]:
|
|
| 508 |
# Single source of truth for the LLM-facing tool description
|
| 509 |
TOOL_SUMMARY = (
|
| 510 |
"Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
|
| 511 |
-
"Supertonic: faster, supports steps/silence/chunking.
|
| 512 |
-
"Kokoro: slower, supports many languages/accents.
|
| 513 |
"Return the generated media to the user in this format ``."
|
| 514 |
)
|
| 515 |
|
|
@@ -521,10 +521,10 @@ def Generate_Speech(
|
|
| 521 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 522 |
model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
|
| 523 |
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
|
|
|
|
| 524 |
voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
max_chunk_size: Annotated[int, "Max text chunk length for Supertonic (50-1000). Ignored for Kokoro."] = 300,
|
| 528 |
) -> str:
|
| 529 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
|
| 530 |
|
|
@@ -654,13 +654,13 @@ def build_interface() -> gr.Interface:
|
|
| 654 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
| 655 |
gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
|
| 656 |
gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
|
|
|
|
| 657 |
gr.Dropdown(
|
| 658 |
label="Voice",
|
| 659 |
choices=all_voices,
|
| 660 |
value="F1",
|
| 661 |
info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
|
| 662 |
),
|
| 663 |
-
gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
|
| 664 |
gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
|
| 665 |
gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
|
| 666 |
],
|
|
|
|
| 508 |
# Single source of truth for the LLM-facing tool description
|
| 509 |
TOOL_SUMMARY = (
|
| 510 |
"Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
|
| 511 |
+
"Supertonic: faster, supports steps/silence/chunking. "
|
| 512 |
+
"Kokoro: slower, supports many languages/accents. "
|
| 513 |
"Return the generated media to the user in this format ``."
|
| 514 |
)
|
| 515 |
|
|
|
|
| 521 |
text: Annotated[str, "The text to synthesize (English)."],
|
| 522 |
model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
|
| 523 |
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
|
| 524 |
+
steps: Annotated[int, "Supertonic only. Diffusion steps (1-50). Higher = better quality but slower."] = 5,
|
| 525 |
voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
|
| 526 |
+
silence_duration: Annotated[float, "Supertonic only. Silence duration between chunks (0.0-2.0s)."] = 0.3,
|
| 527 |
+
max_chunk_size: Annotated[int, "Supertonic only. Max text chunk length (50-1000)."] = 300,
|
|
|
|
| 528 |
) -> str:
|
| 529 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
|
| 530 |
|
|
|
|
| 654 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
| 655 |
gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
|
| 656 |
gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
|
| 657 |
+
gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
|
| 658 |
gr.Dropdown(
|
| 659 |
label="Voice",
|
| 660 |
choices=all_voices,
|
| 661 |
value="F1",
|
| 662 |
info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
|
| 663 |
),
|
|
|
|
| 664 |
gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
|
| 665 |
gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
|
| 666 |
],
|