Nymbo commited on
Commit
84fbcc2
·
verified ·
1 Parent(s): 195b607

Update Modules/Generate_Speech.py

Browse files
Files changed (1) hide show
  1. Modules/Generate_Speech.py +6 -6
Modules/Generate_Speech.py CHANGED
@@ -508,8 +508,8 @@ def List_Supertonic_Voices() -> list[str]:
508
  # Single source of truth for the LLM-facing tool description
509
  TOOL_SUMMARY = (
510
  "Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
511
- "Supertonic: faster, supports steps/silence/chunking. Default voice 'F1'. "
512
- "Kokoro: slower, supports many languages/accents. Default voice 'af_heart'. "
513
  "Return the generated media to the user in this format `![Alt text](URL)`."
514
  )
515
 
@@ -521,10 +521,10 @@ def Generate_Speech(
521
  text: Annotated[str, "The text to synthesize (English)."],
522
  model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
523
  speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
 
524
  voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
525
- steps: Annotated[int, "Diffusion steps for Supertonic (1-50). Higher = better quality but slower. Ignored for Kokoro."] = 5,
526
- silence_duration: Annotated[float, "Silence duration between chunks for Supertonic (0.0-2.0s). Ignored for Kokoro."] = 0.3,
527
- max_chunk_size: Annotated[int, "Max text chunk length for Supertonic (50-1000). Ignored for Kokoro."] = 300,
528
  ) -> str:
529
  _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
530
 
@@ -654,13 +654,13 @@ def build_interface() -> gr.Interface:
654
  gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
655
  gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
656
  gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
 
657
  gr.Dropdown(
658
  label="Voice",
659
  choices=all_voices,
660
  value="F1",
661
  info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
662
  ),
663
- gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
664
  gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
665
  gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
666
  ],
 
508
  # Single source of truth for the LLM-facing tool description
509
  TOOL_SUMMARY = (
510
  "Synthesize speech from text using Supertonic-66M (default) or Kokoro-82M. "
511
+ "Supertonic: faster, supports steps/silence/chunking. "
512
+ "Kokoro: slower, supports many languages/accents. "
513
  "Return the generated media to the user in this format `![Alt text](URL)`."
514
  )
515
 
 
521
  text: Annotated[str, "The text to synthesize (English)."],
522
  model: Annotated[str, "The TTS model to use: 'Supertonic' or 'Kokoro'."] = "Supertonic",
523
  speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.3,
524
+ steps: Annotated[int, "Supertonic only. Diffusion steps (1-50). Higher = better quality but slower."] = 5,
525
  voice: Annotated[str, "Voice identifier. Default 'F1' for Supertonic, 'af_heart' for Kokoro."] = "F1",
526
+ silence_duration: Annotated[float, "Supertonic only. Silence duration between chunks (0.0-2.0s)."] = 0.3,
527
+ max_chunk_size: Annotated[int, "Supertonic only. Max text chunk length (50-1000)."] = 300,
 
528
  ) -> str:
529
  _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), model=model, speed=speed, voice=voice)
530
 
 
654
  gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
655
  gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
656
  gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
657
+ gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only"),
658
  gr.Dropdown(
659
  label="Voice",
660
  choices=all_voices,
661
  value="F1",
662
  info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
663
  ),
 
664
  gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only"),
665
  gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only"),
666
  ],