um41r commited on
Commit
b37d45e
·
verified ·
1 Parent(s): 286a66f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -38
app.py CHANGED
@@ -1,73 +1,68 @@
1
  import gradio as gr
2
- import torch
3
- import io
4
- import scipy.io.wavfile as wavfile
5
  import numpy as np
6
- from pocket_tts import TTSModel
7
-
8
- # Hardcoded voices from error message (no auth needed)
9
- AVAILABLE_VOICES = ['alba', 'marius', 'javert', 'jean', 'fantine', 'cosette', 'eponine', 'azelma']
10
 
11
- # Load model once at startup
12
- tts_model = TTSModel.from_pretrained("kyutai/pocket-tts")
13
- sample_rate = tts_model.sample_rate
14
 
15
- def generate_speech(text, voice_name):
16
- """Generate speech using built-in catalog voices."""
17
  try:
18
- # Get voice state using catalog voice name
19
- voice_state = tts_model.get_state(voice_name)
20
-
21
- # Generate audio
22
- audio = tts_model.generate_audio(voice_state, text)
 
23
 
24
  # Convert to WAV bytes for Gradio
25
- audio_np = audio.cpu().numpy().astype(np.float32)
26
  buffer = io.BytesIO()
27
- wavfile.write(buffer, sample_rate, audio_np)
28
  buffer.seek(0)
29
 
30
- return buffer.read(), f"✅ Generated with '{voice_name}' ({len(text)} chars, {sample_rate}Hz)"
31
  except Exception as e:
32
  return None, f"❌ Error: {str(e)}"
33
 
34
  # Gradio interface
35
- with gr.Blocks(title="Pocket TTS - CPU TTS Demo") as demo:
36
- gr.Markdown("# ⚡ Pocket TTS Demo\nFast CPU text-to-speech with 8 built-in voices.")
37
 
38
  with gr.Row():
39
  with gr.Column(scale=2):
40
  text_input = gr.Textbox(
41
  label="Text to speak",
42
- placeholder="Enter your text...",
43
  lines=3,
44
- value="Hello! This is Pocket TTS running on Hugging Face Spaces CPU."
45
  )
46
- voice_dropdown = gr.Dropdown(
47
- choices=AVAILABLE_VOICES,
48
- label="Voice",
49
- value="alba"
50
  )
51
- generate_btn = gr.Button("🎤 Generate", variant="primary")
52
 
53
  with gr.Column(scale=3):
54
- audio_output = gr.Audio(label="Audio", type="filepath")
55
  status_output = gr.Textbox(label="Status", interactive=False)
56
 
57
  generate_btn.click(
58
  fn=generate_speech,
59
- inputs=[text_input, voice_dropdown],
60
  outputs=[audio_output, status_output]
61
  )
62
 
63
  gr.Examples(
64
- examples=[
65
- ["The quick brown fox jumps over the lazy dog.", "alba"],
66
- ["Testing different voices with Pocket TTS.", "fantine"],
67
- ["CPU-powered text-to-speech demo!", "marius"]
68
- ],
69
- inputs=[text_input, voice_dropdown]
70
  )
 
 
71
 
72
  if __name__ == "__main__":
73
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
 
 
 
2
  import numpy as np
3
+ from pocket_tts_onnx import PocketTTSOnnx
4
+ import io
5
+ import soundfile as sf
6
+ from pathlib import Path
7
 
8
+ # Initialize ONNX model (downloads automatically)
9
+ tts = PocketTTSOnnx()
 
10
 
11
+ def generate_speech(text, voice_sample_path=None):
12
+ """Generate speech with voice cloning or default voice."""
13
  try:
14
+ # Use reference audio or default
15
+ if voice_sample_path:
16
+ audio = tts.generate(text=text, voice=voice_sample_path)
17
+ else:
18
+ # Use bundled reference sample
19
+ audio = tts.generate(text=text, voice="reference_sample.wav")
20
 
21
  # Convert to WAV bytes for Gradio
 
22
  buffer = io.BytesIO()
23
+ sf.write(buffer, audio, tts.sample_rate, format='WAV')
24
  buffer.seek(0)
25
 
26
+ return buffer.read(), f"✅ Generated ({len(text)} chars, {tts.sample_rate}Hz)"
27
  except Exception as e:
28
  return None, f"❌ Error: {str(e)}"
29
 
30
  # Gradio interface
31
+ with gr.Blocks(title="Pocket TTS ONNX Demo") as demo:
32
+ gr.Markdown("# ⚡ Pocket TTS ONNX - Voice Cloning\n100M TTS model running on CPU with ONNX optimization.")
33
 
34
  with gr.Row():
35
  with gr.Column(scale=2):
36
  text_input = gr.Textbox(
37
  label="Text to speak",
38
+ placeholder="Enter text...",
39
  lines=3,
40
+ value="Hello! This is Pocket TTS ONNX running perfectly on Hugging Face Spaces."
41
  )
42
+ voice_upload = gr.Audio(
43
+ sources=["upload"],
44
+ type="filepath",
45
+ label="Voice sample (WAV) for cloning"
46
  )
47
+ generate_btn = gr.Button("🎤 Generate Speech", variant="primary")
48
 
49
  with gr.Column(scale=3):
50
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
51
  status_output = gr.Textbox(label="Status", interactive=False)
52
 
53
  generate_btn.click(
54
  fn=generate_speech,
55
+ inputs=[text_input, voice_upload],
56
  outputs=[audio_output, status_output]
57
  )
58
 
59
  gr.Examples(
60
+ examples=[["Test voice cloning with uploaded audio.", None]],
61
+ inputs=[text_input, voice_upload],
62
+ fn=generate_speech
 
 
 
63
  )
64
+
65
+ gr.Markdown("**Note:** Upload a clean WAV voice sample (3-10s) for best cloning results.")
66
 
67
  if __name__ == "__main__":
68
+ demo.launch(server_name="0.0.0.0")