Spaces:

DhanuakaDev
/

Sin-Tts-v01

Runtime error

App Files Files

DhanuakaDev commited on Nov 27, 2025

Commit

ccf3842

1 Parent(s): 55e5fd0

changed app.py

Browse files

Files changed (1) hide show

app.py +18 -8

app.py CHANGED Viewed

@@ -1,27 +1,25 @@
 import json
 import os
 import gradio as gr
 from TTS.utils.synthesizer import Synthesizer
 # ---------- Paths ----------
-# Make sure these filenames match exactly what is in your Space
-MODEL_PATH = "checkpoint_80000.pth"        # or "best_model_23206.pth"
 CONFIG_PATH = "config.json"
 # ---------- Load config to get sample rate ----------
 with open(CONFIG_PATH, "r", encoding="utf-8") as f:
     cfg = json.load(f)
-# Coqui configs usually store this like cfg["audio"]["sample_rate"]
 SAMPLE_RATE = cfg.get("audio", {}).get("sample_rate", 24000)
 # ---------- Load Coqui TTS Synthesizer ----------
-# For a basic VITS TTS model with no separate vocoder / speakers file:
 synthesizer = Synthesizer(
     tts_checkpoint=MODEL_PATH,
     tts_config_path=CONFIG_PATH,
-    use_cuda=False,   # Spaces CPU; set True only if you enable GPU hardware
 )
 # ---------- Inference function ----------
@@ -29,10 +27,22 @@ def tts_generate(text: str):
     if not text.strip():
         return None
-    # Synthesizer.tts returns a numpy array with audio samples:contentReference[oaicite:5]{index=5}
     wav = synthesizer.tts(text)
-    # Gradio Audio (type="numpy") expects (sample_rate, np.ndarray):contentReference[oaicite:6]{index=6}
     return (SAMPLE_RATE, wav)
 # ---------- Gradio UI ----------
@@ -45,7 +55,7 @@ demo = gr.Interface(
     ),
     outputs=gr.Audio(
         label="Generated speech",
-        type="numpy",
     ),
     title="Sinhala TTS (Coqui VITS)",
     description="Fine-tuned Sinhala TTS model using Coqui-TTS.",

 import json
 import os
+import numpy as np          # NEW: import numpy
 import gradio as gr
 from TTS.utils.synthesizer import Synthesizer
 # ---------- Paths ----------
+MODEL_PATH = "checkpoint_80000.pth"   # or "best_model_23206.pth"
 CONFIG_PATH = "config.json"
 # ---------- Load config to get sample rate ----------
 with open(CONFIG_PATH, "r", encoding="utf-8") as f:
     cfg = json.load(f)
 SAMPLE_RATE = cfg.get("audio", {}).get("sample_rate", 24000)
 # ---------- Load Coqui TTS Synthesizer ----------
 synthesizer = Synthesizer(
     tts_checkpoint=MODEL_PATH,
     tts_config_path=CONFIG_PATH,
+    use_cuda=False,   # Set True only if you enable GPU hardware in the Space
 )
 # ---------- Inference function ----------
     if not text.strip():
         return None
     wav = synthesizer.tts(text)
+    # Coqui sometimes returns:
+    # - list of floats
+    # - or list of numpy arrays (one per sentence)
+    if isinstance(wav, list):
+        if len(wav) == 0:
+            return None
+        if hasattr(wav[0], "dtype"):
+            # list of numpy arrays -> concatenate
+            wav = np.concatenate(wav)
+        else:
+            # list of floats -> convert to numpy array
+            wav = np.array(wav, dtype="float32")
+    # Ensure it's a 1D numpy array for Gradio
     return (SAMPLE_RATE, wav)
 # ---------- Gradio UI ----------
     ),
     outputs=gr.Audio(
         label="Generated speech",
+        type="numpy",     # Gradio expects (sr, np.ndarray)
     ),
     title="Sinhala TTS (Coqui VITS)",
     description="Fine-tuned Sinhala TTS model using Coqui-TTS.",