Borio047 commited on
Commit
5e4386d
·
verified ·
1 Parent(s): 8663e56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -47
app.py CHANGED
@@ -1,68 +1,72 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import pipeline
 
 
 
 
4
 
5
- # 1. Choose a TTS model from Hugging Face
6
- # This model is for English TTS. You can later swap it for another.
7
- TTS_MODEL_ID = "facebook/mms-tts-eng"
8
 
9
- # 2. Create the TTS pipeline
10
- device = 0 if torch.cuda.is_available() else -1
11
- print(f"Using device: {'cuda' if device == 0 else 'cpu'}")
12
-
13
- try:
14
- tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
15
- except Exception as e:
16
- # If the model can't be loaded, fail early with a clear message
17
- raise RuntimeError(f"Failed to load TTS pipeline: {e}")
18
-
19
-
20
- def synthesize_tts(text: str):
21
  """
22
- Take text and return (sampling_rate, audio_numpy) for Gradio Audio output.
 
23
  """
24
  if not text or text.strip() == "":
25
- raise gr.Error("Please enter some text to synthesize.")
26
 
27
- try:
28
- out = tts(text)
29
- except Exception as e:
30
- # Show any HF pipeline error nicely in the UI
31
- raise gr.Error(f"TTS pipeline error: {e}")
32
 
33
- # Expecting a dict with 'audio' (numpy array) and 'sampling_rate' (int)
34
- if not isinstance(out, dict) or "audio" not in out or "sampling_rate" not in out:
35
- raise gr.Error(f"Unexpected TTS output format: {out}")
36
 
37
- audio = out["audio"]
38
- sr = out["sampling_rate"]
 
39
 
40
- return (sr, audio)
 
 
 
 
41
 
 
 
42
 
43
- title = "Simple Text-to-Speech (TTS) Space"
44
- description = (
45
- "Enter some English text and generate speech using a Hugging Face TTS model. "
46
- "Once this works, we can upgrade it to voice cloning (F5-TTS style)."
47
- )
48
 
49
  with gr.Blocks() as demo:
50
- gr.Markdown(f"# {title}")
51
- gr.Markdown(description)
 
 
 
52
 
53
  with gr.Row():
54
- with gr.Column():
55
- text_in = gr.Textbox(
 
 
56
  lines=4,
57
- label="Text to synthesize",
58
- placeholder="Type some English text here..."
59
  )
60
- btn = gr.Button("Generate Speech")
61
- with gr.Column():
62
- # type='numpy' means we can return (sr, numpy_array)
63
- audio_out = gr.Audio(label="Generated audio", type="numpy")
 
 
64
 
65
- btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
 
 
 
 
66
 
67
- # On Spaces it's fine to launch unconditionally; disable SSR to avoid async quirks
68
- demo.launch(ssr_mode=False)
 
 
1
  import gradio as gr
 
2
  from transformers import pipeline
3
+ import numpy as np
4
+ import soundfile as sf
5
+ import os
6
+ import uuid
7
 
8
+ # Load TTS pipeline once at startup
9
+ TTS_MODEL_ID = "suno/bark-small"
10
+ tts = pipeline("text-to-speech", model=TTS_MODEL_ID)
11
 
12
+ def generate_speech(text: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
13
  """
14
+ Takes input text and returns a filepath to a WAV file
15
+ for gr.Audio(type="filepath").
16
  """
17
  if not text or text.strip() == "":
18
+ raise gr.Error("Please enter some text to synthesize 🙂")
19
 
20
+ # Run the model
21
+ output = tts(text)
 
 
 
22
 
23
+ # Expecting {"audio": np.ndarray or list, "sampling_rate": int}
24
+ audio = np.asarray(output["audio"], dtype=np.float32)
25
+ sr = int(output["sampling_rate"])
26
 
27
+ # Ensure mono or stereo is fine; soundfile can handle it
28
+ if audio.ndim > 1:
29
+ audio = audio.squeeze()
30
 
31
+ # Create a unique temporary path
32
+ tmp_dir = "/tmp"
33
+ os.makedirs(tmp_dir, exist_ok=True)
34
+ filename = f"tts_{uuid.uuid4().hex}.wav"
35
+ filepath = os.path.join(tmp_dir, filename)
36
 
37
+ # Write WAV using soundfile (no pydub, no wave header issues)
38
+ sf.write(filepath, audio, sr)
39
 
40
+ # Return the path; gr.Audio(type="filepath") will use it directly
41
+ return filepath
 
 
 
42
 
43
  with gr.Blocks() as demo:
44
+ gr.Markdown("# 🗣️ Simple Text-to-Speech Demo (Bark Small)")
45
+ gr.Markdown(
46
+ "Type some English text, click **Generate speech**, and listen to the audio.\n"
47
+ "Model: `suno/bark-small` via 🤗 Transformers TTS pipeline."
48
+ )
49
 
50
  with gr.Row():
51
+ with gr.Column(scale=2):
52
+ text_input = gr.Textbox(
53
+ label="Input text",
54
+ placeholder="Type something like: Hello, this is my first TTS Space!",
55
  lines=4,
 
 
56
  )
57
+ generate_button = gr.Button("Generate speech", variant="primary")
58
+ with gr.Column(scale=1):
59
+ audio_output = gr.Audio(
60
+ label="Generated audio",
61
+ type="filepath", # we are returning a path string
62
+ )
63
 
64
+ generate_button.click(
65
+ fn=generate_speech,
66
+ inputs=text_input,
67
+ outputs=audio_output,
68
+ )
69
 
70
+ if __name__ == "__main__":
71
+ # Disable SSR to avoid async quirks
72
+ demo.launch(ssr_mode=False)