Update Modules/Generate_Speech.py
Browse files- Modules/Generate_Speech.py +27 -80
Modules/Generate_Speech.py
CHANGED
|
@@ -643,88 +643,35 @@ def _generate_supertonic(text: str, speed: float, voice: str, steps: int, silenc
|
|
| 643 |
return output_path
|
| 644 |
|
| 645 |
|
| 646 |
-
def build_interface() -> gr.
|
| 647 |
kokoro_voices = get_kokoro_voices()
|
| 648 |
supertonic_voices = get_supertonic_voices()
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
with gr.Row():
|
| 676 |
-
clear_btn = gr.Button("Clear")
|
| 677 |
-
gen_btn = gr.Button("Generate", variant="primary")
|
| 678 |
-
|
| 679 |
-
with gr.Column():
|
| 680 |
-
audio_output = gr.Audio(label="Audio", type="filepath", format="wav")
|
| 681 |
-
|
| 682 |
-
def update_voices(model_name):
|
| 683 |
-
if model_name == "Kokoro":
|
| 684 |
-
return {
|
| 685 |
-
voice_dropdown: gr.Dropdown(choices=kokoro_voices, value="af_heart"),
|
| 686 |
-
supertonic_params: gr.Group(visible=False)
|
| 687 |
-
}
|
| 688 |
-
else:
|
| 689 |
-
return {
|
| 690 |
-
voice_dropdown: gr.Dropdown(choices=supertonic_voices, value="F1"),
|
| 691 |
-
supertonic_params: gr.Group(visible=True)
|
| 692 |
-
}
|
| 693 |
-
|
| 694 |
-
def clear_inputs():
|
| 695 |
-
return [
|
| 696 |
-
"", # text_input
|
| 697 |
-
"Supertonic", # model_dropdown
|
| 698 |
-
"F1", # voice_dropdown
|
| 699 |
-
1.3, # speed_slider
|
| 700 |
-
5, # steps_slider
|
| 701 |
-
0.3, # silence_slider
|
| 702 |
-
300, # chunk_slider
|
| 703 |
-
None # audio_output
|
| 704 |
-
]
|
| 705 |
-
|
| 706 |
-
clear_btn.click(
|
| 707 |
-
fn=clear_inputs,
|
| 708 |
-
inputs=[],
|
| 709 |
-
outputs=[text_input, model_dropdown, voice_dropdown, speed_slider, steps_slider, silence_slider, chunk_slider, audio_output]
|
| 710 |
-
)
|
| 711 |
-
|
| 712 |
-
model_dropdown.change(
|
| 713 |
-
fn=update_voices,
|
| 714 |
-
inputs=[model_dropdown],
|
| 715 |
-
outputs=[voice_dropdown, supertonic_params]
|
| 716 |
-
)
|
| 717 |
-
|
| 718 |
-
gen_btn.click(
|
| 719 |
-
fn=Generate_Speech,
|
| 720 |
-
inputs=[text_input, model_dropdown, speed_slider, voice_dropdown, steps_slider, silence_slider, chunk_slider],
|
| 721 |
-
outputs=[audio_output]
|
| 722 |
-
)
|
| 723 |
-
|
| 724 |
-
# Expose the function for API
|
| 725 |
-
demo.fn = Generate_Speech
|
| 726 |
-
|
| 727 |
-
return demo
|
| 728 |
|
| 729 |
|
| 730 |
__all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]
|
|
|
|
| 643 |
return output_path
|
| 644 |
|
| 645 |
|
| 646 |
+
def build_interface() -> gr.Interface:
|
| 647 |
kokoro_voices = get_kokoro_voices()
|
| 648 |
supertonic_voices = get_supertonic_voices()
|
| 649 |
+
all_voices = sorted(list(set(kokoro_voices + supertonic_voices)))
|
| 650 |
+
|
| 651 |
+
return gr.Interface(
|
| 652 |
+
fn=Generate_Speech,
|
| 653 |
+
inputs=[
|
| 654 |
+
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
| 655 |
+
gr.Dropdown(label="Model", choices=["Supertonic", "Kokoro"], value="Supertonic"),
|
| 656 |
+
gr.Slider(minimum=0.5, maximum=2.0, value=1.3, step=0.1, label="Speed"),
|
| 657 |
+
gr.Dropdown(
|
| 658 |
+
label="Voice",
|
| 659 |
+
choices=all_voices,
|
| 660 |
+
value="F1",
|
| 661 |
+
info="Select voice (F1/F2/M1/M2 for Supertonic, others for Kokoro)",
|
| 662 |
+
),
|
| 663 |
+
gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Steps (Supertonic only)"),
|
| 664 |
+
gr.Slider(minimum=0.0, maximum=2.0, value=0.3, step=0.1, label="Silence Duration (Supertonic only)"),
|
| 665 |
+
gr.Slider(minimum=50, maximum=1000, value=300, step=10, label="Max Chunk Size (Supertonic only)"),
|
| 666 |
+
],
|
| 667 |
+
outputs=gr.Audio(label="Audio", type="filepath", format="wav"),
|
| 668 |
+
title="Generate Speech",
|
| 669 |
+
description=(
|
| 670 |
+
"<div style=\"text-align:center\">Generate speech with Supertonic-66M or Kokoro-82M. Runs on CPU.</div>"
|
| 671 |
+
),
|
| 672 |
+
api_description=TOOL_SUMMARY,
|
| 673 |
+
flagging_mode="never",
|
| 674 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
|
| 676 |
|
| 677 |
__all__ = ["Generate_Speech", "List_Kokoro_Voices", "List_Supertonic_Voices", "build_interface"]
|