Spaces:

damla921
/

Luganda_TTS

Sleeping

App Files Files Community

damla921 commited on Oct 8, 2025

Commit

61aea24

verified ·

1 Parent(s): c9ff56e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import time, os, shutil, subprocess, tempfile
 import gradio as gr
-import torchaudio
 import torch
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
@@ -31,6 +32,11 @@ def _ensure_mel_shape(mel):
 def _have_ffmpeg():
     return shutil.which("ffmpeg") is not None
 def tts_luganda(text):
     text = (text or "").strip()
     if not text:
@@ -38,14 +44,15 @@ def tts_luganda(text):
     # Synthesize
     mel = _ensure_mel_shape(taco.encode_text(text))
-    wav = vocoder.decode_batch(mel)[0].squeeze(0).cpu()
     # Save a temporary WAV
     ts = int(time.time())
-    base = "luganda_tts_" + str(ts)
     wav_path = os.path.join(tempfile.gettempdir(), base + ".wav")
-    torchaudio.save(wav_path, wav.unsqueeze(0), SAMPLE_RATE)
     mp3_path = None
     if _have_ffmpeg():
         mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3")
@@ -65,22 +72,17 @@ def tts_luganda(text):
     else:
         status += " (WAV ready)"
-    # Return both: Gradio will show player + download button
-    # First output is WAV for universal playback; second is MP3 if available
     return wav_path, (mp3_path if mp3_path else None), status
 with gr.Blocks(title="Luganda TTS") as demo:
     gr.Markdown("# 🌍 Luganda Text-to-Speech\nType Luganda, click **Generate**, and listen/download the audio.")
-    with gr.Row():
-        text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.")
-    with gr.Row():
-        btn = gr.Button("Generate", variant="primary")
-    with gr.Row():
-        out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath")
-        out_mp3 = gr.File(label="Download MP3", interactive=False)
     status = gr.Markdown("Ready.")
     btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status])
-# Make public queue robust when multiple users test at once
-demo.queue(concurrency_count=1, max_size=8).launch()

 import time, os, shutil, subprocess, tempfile
+import numpy as np
 import gradio as gr
+import soundfile as sf
 import torch
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
 def _have_ffmpeg():
     return shutil.which("ffmpeg") is not None
+def _save_wav_np(path, wav_tensor):
+    """Save float32 mono [-1,1] to WAV using soundfile (no torchaudio backend needed)."""
+    x = wav_tensor.detach().cpu().numpy().astype(np.float32)
+    sf.write(path, x, SAMPLE_RATE, subtype="PCM_16")
 def tts_luganda(text):
     text = (text or "").strip()
     if not text:
     # Synthesize
     mel = _ensure_mel_shape(taco.encode_text(text))
+    wav = vocoder.decode_batch(mel)[0].squeeze(0)  # 1D torch tensor
     # Save a temporary WAV
     ts = int(time.time())
+    base = f"luganda_tts_{ts}"
     wav_path = os.path.join(tempfile.gettempdir(), base + ".wav")
+    _save_wav_np(wav_path, wav)
+    # Optional MP3 via ffmpeg
     mp3_path = None
     if _have_ffmpeg():
         mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3")
     else:
         status += " (WAV ready)"
     return wav_path, (mp3_path if mp3_path else None), status
 with gr.Blocks(title="Luganda TTS") as demo:
     gr.Markdown("# 🌍 Luganda Text-to-Speech\nType Luganda, click **Generate**, and listen/download the audio.")
+    text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.")
+    btn = gr.Button("Generate", variant="primary")
+    out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath")
+    out_mp3 = gr.File(label="Download MP3", interactive=False)
     status = gr.Markdown("Ready.")
     btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status])
+# Just enable queue with defaults (no unsupported args)
+demo.queue().launch()