Spaces:

NeuralFalcon
/

Pocket-TTS

Running

App Files Files Community

NeuralFalcon commited on 10 days ago

Commit

9ab845a

verified ·

1 Parent(s): 2dfd0aa

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -32

app.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import gradio as gr
 import os
 import re
@@ -6,13 +12,6 @@ import scipy.io.wavfile
 import torch
 from pocket_tts import TTSModel
-#for voice clone
-from huggingface_hub import login
-hf_token = os.getenv("HF_TOKEN")
-if hf_token:
-    login(token=hf_token)
 print("Loading TTS Model...")
 try:
     tts_model = TTSModel.load_model()
@@ -36,7 +35,7 @@ def get_tts_file_name(text, language="en"):
     )
 DEFAULT_VOICES = [
-    "alba", "marius", "javert", "jean",
     "fantine", "cosette", "eponine", "azelma"
 ]
@@ -45,15 +44,15 @@ def generate_speech(text, mode, preset_voice, clone_audio_path):
         raise gr.Error("Please enter text to generate speech.")
     state = None
     if mode == "Default Voices":
         print(f"Using preset voice: {preset_voice}")
         state = tts_model.get_state_for_audio_prompt(preset_voice)
     else:
         if not clone_audio_path:
             raise gr.Error("Please upload a reference audio file for cloning.")
         print(f"Cloning voice from: {clone_audio_path}")
         try:
             state = tts_model.get_state_for_audio_prompt(clone_audio_path)
@@ -64,10 +63,10 @@ def generate_speech(text, mode, preset_voice, clone_audio_path):
     try:
         audio_tensor = tts_model.generate_audio(state, text)
         output_filename = get_tts_file_name(text)
         scipy.io.wavfile.write(output_filename, tts_model.sample_rate, audio_tensor.numpy())
         return output_filename
     except Exception as e:
         raise gr.Error(f"Generation failed: {str(e)}")
@@ -80,8 +79,8 @@ def toggle_inputs(mode):
 CUSTOM_CSS = """
-.gradio-container {
-    font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif;
 }
 .header-container {
     text-align: center;
@@ -98,8 +97,8 @@ CUSTOM_CSS = """
     opacity: 0.9;
 }
 .links-container a {
-    text-decoration: none;
-    color: #4a90e2;
     font-weight: 500;
 }
 .links-container a:hover {
@@ -165,22 +164,22 @@ HEADER_HTML = """
 with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
     gr.HTML(HEADER_HTML)
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(
-                label="Text Input",
-                placeholder="Hi, how are you?",
                 lines=3,
                 value="Hi, how are you?"
             )
             mode_radio = gr.Radio(
                 choices=["Default Voices", "Voice Clone"],
                 value="Default Voices",
                 label="TTS Mode"
             )
             with gr.Group():
                 dropdown_input = gr.Dropdown(
                     choices=DEFAULT_VOICES,
@@ -188,18 +187,22 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
                     label="Select Voice",
                     visible=True
                 )
                 audio_upload = gr.Audio(
                     label="Upload Reference Audio (WAV recommended)",
                     type="filepath",
                     visible=False
                 )
             generate_btn = gr.Button("Generate Audio", variant="primary")
             example_audio_url = "https://huggingface.co/kyutai/tts-voices/resolve/main/alba-mackenna/casual.wav"
-            gr.Examples(
                 examples=[
                     ["Hello, I am Fantine. Nice to meet you.", "Default Voices", "fantine", None],
                     ["I am Cosette, and the weather is lovely.", "Default Voices", "cosette", None],
@@ -210,17 +213,12 @@ with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
                 inputs=[text_input, mode_radio, dropdown_input, audio_upload],
                 label="Click on an Example to Try"
             )
-        with gr.Column():
-            output_audio = gr.Audio(label="Generated Speech", type="filepath")
     mode_radio.change(
         fn=toggle_inputs,
         inputs=[mode_radio],
         outputs=[dropdown_input, audio_upload]
     )
     generate_btn.click(
         fn=generate_speech,
         inputs=[text_input, mode_radio, dropdown_input, audio_upload],

+#for voice clone
+from huggingface_hub import login
+hf_token = os.getenv("HF_TOKEN")
+if hf_token:
+    login(token=hf_token)
 import gradio as gr
 import os
 import re
 import torch
 from pocket_tts import TTSModel
 print("Loading TTS Model...")
 try:
     tts_model = TTSModel.load_model()
     )
 DEFAULT_VOICES = [
+    "alba", "marius", "javert", "jean",
     "fantine", "cosette", "eponine", "azelma"
 ]
         raise gr.Error("Please enter text to generate speech.")
     state = None
     if mode == "Default Voices":
         print(f"Using preset voice: {preset_voice}")
         state = tts_model.get_state_for_audio_prompt(preset_voice)
     else:
         if not clone_audio_path:
             raise gr.Error("Please upload a reference audio file for cloning.")
         print(f"Cloning voice from: {clone_audio_path}")
         try:
             state = tts_model.get_state_for_audio_prompt(clone_audio_path)
     try:
         audio_tensor = tts_model.generate_audio(state, text)
         output_filename = get_tts_file_name(text)
         scipy.io.wavfile.write(output_filename, tts_model.sample_rate, audio_tensor.numpy())
         return output_filename
     except Exception as e:
         raise gr.Error(f"Generation failed: {str(e)}")
 CUSTOM_CSS = """
+.gradio-container {
+    font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif;
 }
 .header-container {
     text-align: center;
     opacity: 0.9;
 }
 .links-container a {
+    text-decoration: none;
+    color: #4a90e2;
     font-weight: 500;
 }
 .links-container a:hover {
 with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=CUSTOM_CSS) as demo:
     gr.HTML(HEADER_HTML)
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(
+                label="Text Input",
+                placeholder="Hi, how are you?",
                 lines=3,
                 value="Hi, how are you?"
             )
             mode_radio = gr.Radio(
                 choices=["Default Voices", "Voice Clone"],
                 value="Default Voices",
                 label="TTS Mode"
             )
             with gr.Group():
                 dropdown_input = gr.Dropdown(
                     choices=DEFAULT_VOICES,
                     label="Select Voice",
                     visible=True
                 )
                 audio_upload = gr.Audio(
                     label="Upload Reference Audio (WAV recommended)",
                     type="filepath",
                     visible=False
                 )
             generate_btn = gr.Button("Generate Audio", variant="primary")
             example_audio_url = "https://huggingface.co/kyutai/tts-voices/resolve/main/alba-mackenna/casual.wav"
+        with gr.Column():
+            output_audio = gr.Audio(label="Generated Speech", type="filepath")
+    gr.Examples(
                 examples=[
                     ["Hello, I am Fantine. Nice to meet you.", "Default Voices", "fantine", None],
                     ["I am Cosette, and the weather is lovely.", "Default Voices", "cosette", None],
                 inputs=[text_input, mode_radio, dropdown_input, audio_upload],
                 label="Click on an Example to Try"
             )
     mode_radio.change(
         fn=toggle_inputs,
         inputs=[mode_radio],
         outputs=[dropdown_input, audio_upload]
     )
     generate_btn.click(
         fn=generate_speech,
         inputs=[text_input, mode_radio, dropdown_input, audio_upload],