Spaces:

um41r
/

pocket-tts

Build error

App Files Files Community

um41r commited on Jan 21

Commit

b37d45e

verified ·

1 Parent(s): 286a66f

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -38

app.py CHANGED Viewed

@@ -1,73 +1,68 @@
 import gradio as gr
-import torch
-import io
-import scipy.io.wavfile as wavfile
 import numpy as np
-from pocket_tts import TTSModel
-# Hardcoded voices from error message (no auth needed)
-AVAILABLE_VOICES = ['alba', 'marius', 'javert', 'jean', 'fantine', 'cosette', 'eponine', 'azelma']
-# Load model once at startup
-tts_model = TTSModel.from_pretrained("kyutai/pocket-tts")
-sample_rate = tts_model.sample_rate
-def generate_speech(text, voice_name):
-    """Generate speech using built-in catalog voices."""
     try:
-        # Get voice state using catalog voice name
-        voice_state = tts_model.get_state(voice_name)
-        # Generate audio
-        audio = tts_model.generate_audio(voice_state, text)
         # Convert to WAV bytes for Gradio
-        audio_np = audio.cpu().numpy().astype(np.float32)
         buffer = io.BytesIO()
-        wavfile.write(buffer, sample_rate, audio_np)
         buffer.seek(0)
-        return buffer.read(), f"✅ Generated with '{voice_name}' ({len(text)} chars, {sample_rate}Hz)"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # Gradio interface
-with gr.Blocks(title="Pocket TTS - CPU TTS Demo") as demo:
-    gr.Markdown("# ⚡ Pocket TTS Demo\nFast CPU text-to-speech with 8 built-in voices.")
     with gr.Row():
         with gr.Column(scale=2):
             text_input = gr.Textbox(
                 label="Text to speak",
-                placeholder="Enter your text...",
                 lines=3,
-                value="Hello! This is Pocket TTS running on Hugging Face Spaces CPU."
             )
-            voice_dropdown = gr.Dropdown(
-                choices=AVAILABLE_VOICES,
-                label="Voice",
-                value="alba"
             )
-            generate_btn = gr.Button("🎤 Generate", variant="primary")
         with gr.Column(scale=3):
-            audio_output = gr.Audio(label="Audio", type="filepath")
             status_output = gr.Textbox(label="Status", interactive=False)
     generate_btn.click(
         fn=generate_speech,
-        inputs=[text_input, voice_dropdown],
         outputs=[audio_output, status_output]
     )
     gr.Examples(
-        examples=[
-            ["The quick brown fox jumps over the lazy dog.", "alba"],
-            ["Testing different voices with Pocket TTS.", "fantine"],
-            ["CPU-powered text-to-speech demo!", "marius"]
-        ],
-        inputs=[text_input, voice_dropdown]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import numpy as np
+from pocket_tts_onnx import PocketTTSOnnx
+import io
+import soundfile as sf
+from pathlib import Path
+# Initialize ONNX model (downloads automatically)
+tts = PocketTTSOnnx()
+def generate_speech(text, voice_sample_path=None):
+    """Generate speech with voice cloning or default voice."""
     try:
+        # Use reference audio or default
+        if voice_sample_path:
+            audio = tts.generate(text=text, voice=voice_sample_path)
+        else:
+            # Use bundled reference sample
+            audio = tts.generate(text=text, voice="reference_sample.wav")
         # Convert to WAV bytes for Gradio
         buffer = io.BytesIO()
+        sf.write(buffer, audio, tts.sample_rate, format='WAV')
         buffer.seek(0)
+        return buffer.read(), f"✅ Generated ({len(text)} chars, {tts.sample_rate}Hz)"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # Gradio interface
+with gr.Blocks(title="Pocket TTS ONNX Demo") as demo:
+    gr.Markdown("# ⚡ Pocket TTS ONNX - Voice Cloning\n100M TTS model running on CPU with ONNX optimization.")
     with gr.Row():
         with gr.Column(scale=2):
             text_input = gr.Textbox(
                 label="Text to speak",
+                placeholder="Enter text...",
                 lines=3,
+                value="Hello! This is Pocket TTS ONNX running perfectly on Hugging Face Spaces."
             )
+            voice_upload = gr.Audio(
+                sources=["upload"],
+                type="filepath",
+                label="Voice sample (WAV) for cloning"
             )
+            generate_btn = gr.Button("🎤 Generate Speech", variant="primary")
         with gr.Column(scale=3):
+            audio_output = gr.Audio(label="Generated Audio", type="filepath")
             status_output = gr.Textbox(label="Status", interactive=False)
     generate_btn.click(
         fn=generate_speech,
+        inputs=[text_input, voice_upload],
         outputs=[audio_output, status_output]
     )
     gr.Examples(
+        examples=[["Test voice cloning with uploaded audio.", None]],
+        inputs=[text_input, voice_upload],
+        fn=generate_speech
     )
+    gr.Markdown("**Note:** Upload a clean WAV voice sample (3-10s) for best cloning results.")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0")