damla921 commited on
Commit
61aea24
·
verified ·
1 Parent(s): c9ff56e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import time, os, shutil, subprocess, tempfile
 
2
  import gradio as gr
3
- import torchaudio
4
  import torch
5
  from speechbrain.inference.TTS import Tacotron2
6
  from speechbrain.inference.vocoders import HIFIGAN
@@ -31,6 +32,11 @@ def _ensure_mel_shape(mel):
31
  def _have_ffmpeg():
32
  return shutil.which("ffmpeg") is not None
33
 
 
 
 
 
 
34
  def tts_luganda(text):
35
  text = (text or "").strip()
36
  if not text:
@@ -38,14 +44,15 @@ def tts_luganda(text):
38
 
39
  # Synthesize
40
  mel = _ensure_mel_shape(taco.encode_text(text))
41
- wav = vocoder.decode_batch(mel)[0].squeeze(0).cpu()
42
 
43
  # Save a temporary WAV
44
  ts = int(time.time())
45
- base = "luganda_tts_" + str(ts)
46
  wav_path = os.path.join(tempfile.gettempdir(), base + ".wav")
47
- torchaudio.save(wav_path, wav.unsqueeze(0), SAMPLE_RATE)
48
 
 
49
  mp3_path = None
50
  if _have_ffmpeg():
51
  mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3")
@@ -65,22 +72,17 @@ def tts_luganda(text):
65
  else:
66
  status += " (WAV ready)"
67
 
68
- # Return both: Gradio will show player + download button
69
- # First output is WAV for universal playback; second is MP3 if available
70
  return wav_path, (mp3_path if mp3_path else None), status
71
 
72
  with gr.Blocks(title="Luganda TTS") as demo:
73
  gr.Markdown("# 🌍 Luganda Text-to-Speech\nType Luganda, click **Generate**, and listen/download the audio.")
74
- with gr.Row():
75
- text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.")
76
- with gr.Row():
77
- btn = gr.Button("Generate", variant="primary")
78
- with gr.Row():
79
- out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath")
80
- out_mp3 = gr.File(label="Download MP3", interactive=False)
81
  status = gr.Markdown("Ready.")
82
 
83
  btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status])
84
 
85
- # Make public queue robust when multiple users test at once
86
- demo.queue(concurrency_count=1, max_size=8).launch()
 
1
  import time, os, shutil, subprocess, tempfile
2
+ import numpy as np
3
  import gradio as gr
4
+ import soundfile as sf
5
  import torch
6
  from speechbrain.inference.TTS import Tacotron2
7
  from speechbrain.inference.vocoders import HIFIGAN
 
32
  def _have_ffmpeg():
33
  return shutil.which("ffmpeg") is not None
34
 
35
+ def _save_wav_np(path, wav_tensor):
36
+ """Save float32 mono [-1,1] to WAV using soundfile (no torchaudio backend needed)."""
37
+ x = wav_tensor.detach().cpu().numpy().astype(np.float32)
38
+ sf.write(path, x, SAMPLE_RATE, subtype="PCM_16")
39
+
40
  def tts_luganda(text):
41
  text = (text or "").strip()
42
  if not text:
 
44
 
45
  # Synthesize
46
  mel = _ensure_mel_shape(taco.encode_text(text))
47
+ wav = vocoder.decode_batch(mel)[0].squeeze(0) # 1D torch tensor
48
 
49
  # Save a temporary WAV
50
  ts = int(time.time())
51
+ base = f"luganda_tts_{ts}"
52
  wav_path = os.path.join(tempfile.gettempdir(), base + ".wav")
53
+ _save_wav_np(wav_path, wav)
54
 
55
+ # Optional MP3 via ffmpeg
56
  mp3_path = None
57
  if _have_ffmpeg():
58
  mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3")
 
72
  else:
73
  status += " (WAV ready)"
74
 
 
 
75
  return wav_path, (mp3_path if mp3_path else None), status
76
 
77
  with gr.Blocks(title="Luganda TTS") as demo:
78
  gr.Markdown("# 🌍 Luganda Text-to-Speech\nType Luganda, click **Generate**, and listen/download the audio.")
79
+ text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.")
80
+ btn = gr.Button("Generate", variant="primary")
81
+ out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath")
82
+ out_mp3 = gr.File(label="Download MP3", interactive=False)
 
 
 
83
  status = gr.Markdown("Ready.")
84
 
85
  btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status])
86
 
87
+ # Just enable queue with defaults (no unsupported args)
88
+ demo.queue().launch()