Nymbo commited on
Commit
f76e04a
·
verified ·
1 Parent(s): 193b0d8

Update Modules/Generate_Speech.py

Browse files
Files changed (1) hide show
  1. Modules/Generate_Speech.py +27 -80
Modules/Generate_Speech.py CHANGED
@@ -643,88 +643,35 @@ def _generate_supertonic(text: str, speed: float, voice: str, steps: int, silenc
643
  return output_path
644
 
645
 
646
- def build_interface() -> gr.Blocks:
647
  kokoro_voices = get_kokoro_voices()
648
  supertonic_voices = get_supertonic_voices()
649
-
650
- with gr.Blocks(title="Generate Speech") as demo:
651
- gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Generate Speech</h1>")
652
- gr.Markdown("<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>")
653
-
654
- with gr.Row():
655
- with gr.Column():
656
- text_input = gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4)
657
- model_dropdown = gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic")
658
-
659
- # Voice dropdown needs to update based on model
660
- voice_dropdown = gr.Dropdown(
661
- label="Voice",
662
- choices=supertonic_voices,
663
- value="F1",
664
- info="Select voice"
665
- )
666
-
667
- speed_slider = gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed")
668
-
669
- # Supertonic specific
670
- with gr.Group() as supertonic_params:
671
- steps_slider = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps", info="Supertonic only")
672
- silence_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration", info="Supertonic only")
673
- chunk_slider = gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size", info="Supertonic only")
674
-
675
- with gr.Row():
676
- clear_btn = gr.Button("Clear")
677
- gen_btn = gr.Button("Generate", variant="primary")
678
-
679
- with gr.Column():
680
- audio_output = gr.Audio(label="Audio", type="filepath", format="wav")
681
-
682
- def update_voices(model_name):
683
- if model_name == "Kokoro":
684
- return {
685
- voice_dropdown: gr.Dropdown(choices=kokoro_voices, value="af_heart"),
686
- supertonic_params: gr.Group(visible=False)
687
- }
688
- else:
689
- return {
690
- voice_dropdown: gr.Dropdown(choices=supertonic_voices, value="F1"),
691
- supertonic_params: gr.Group(visible=True)
692
- }
693
-
694
- def clear_inputs():
695
- return [
696
- "", # text_input
697
- "Supertonic", # model_dropdown
698
- "F1", # voice_dropdown
699
- 1.3, # speed_slider
700
- 5, # steps_slider
701
- 0.3, # silence_slider
702
- 300, # chunk_slider
703
- None # audio_output
704
- ]
705
-
706
- clear_btn.click(
707
- fn=clear_inputs,
708
- inputs=[],
709
- outputs=[text_input, model_dropdown, voice_dropdown, speed_slider, steps_slider, silence_slider, chunk_slider, audio_output]
710
- )
711
-
712
- model_dropdown.change(
713
- fn=update_voices,
714
- inputs=[model_dropdown],
715
- outputs=[voice_dropdown, supertonic_params]
716
- )
717
-
718
- gen_btn.click(
719
- fn=Generate_Speech,
720
- inputs=[text_input, model_dropdown, speed_slider, voice_dropdown, steps_slider, silence_slider, chunk_slider],
721
- outputs=[audio_output]
722
- )
723
-
724
- # Expose the function for API
725
- demo.fn = Generate_Speech
726
-
727
- return demo
728
 
729
 
730
  __all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]
 
643
  return output_path
644
 
645
 
646
+ def build_interface() -> gr.Interface:
647
  kokoro_voices = get_kokoro_voices()
648
  supertonic_voices = get_supertonic_voices()
649
+ all_voices = sorted(list(set(kokoro_voices + supertonic_voices)))
650
+
651
+ return gr.Interface(
652
+ fn=Generate_Speech,
653
+ inputs=[
654
+ gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
655
+ gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
656
+ gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
657
+ gr.Dropdown(
658
+ label="Voice",
659
+ choices=all_voices,
660
+ value="F1",
661
+ info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
662
+ ),
663
+ gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps (Supertonic only)"),
664
+ gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration (Supertonic only)"),
665
+ gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size (Supertonic only)"),
666
+ ],
667
+ outputs=gr.Audio(label="Audio", type="filepath", format="wav"),
668
+ title="Generate Speech",
669
+ description=(
670
+ "<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>"
671
+ ),
672
+ api_description=TOOL_SUMMARY,
673
+ flagging_mode="never",
674
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
 
677
  __all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]