DhanuakaDev commited on
Commit
ccf3842
·
1 Parent(s): 55e5fd0

changed app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -1,27 +1,25 @@
1
  import json
2
  import os
3
 
 
4
  import gradio as gr
5
  from TTS.utils.synthesizer import Synthesizer
6
 
7
  # ---------- Paths ----------
8
- # Make sure these filenames match exactly what is in your Space
9
- MODEL_PATH = "checkpoint_80000.pth" # or "best_model_23206.pth"
10
  CONFIG_PATH = "config.json"
11
 
12
  # ---------- Load config to get sample rate ----------
13
  with open(CONFIG_PATH, "r", encoding="utf-8") as f:
14
  cfg = json.load(f)
15
 
16
- # Coqui configs usually store this like cfg["audio"]["sample_rate"]
17
  SAMPLE_RATE = cfg.get("audio", {}).get("sample_rate", 24000)
18
 
19
  # ---------- Load Coqui TTS Synthesizer ----------
20
- # For a basic VITS TTS model with no separate vocoder / speakers file:
21
  synthesizer = Synthesizer(
22
  tts_checkpoint=MODEL_PATH,
23
  tts_config_path=CONFIG_PATH,
24
- use_cuda=False, # Spaces CPU; set True only if you enable GPU hardware
25
  )
26
 
27
  # ---------- Inference function ----------
@@ -29,10 +27,22 @@ def tts_generate(text: str):
29
  if not text.strip():
30
  return None
31
 
32
- # Synthesizer.tts returns a numpy array with audio samples:contentReference[oaicite:5]{index=5}
33
  wav = synthesizer.tts(text)
34
 
35
- # Gradio Audio (type="numpy") expects (sample_rate, np.ndarray):contentReference[oaicite:6]{index=6}
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return (SAMPLE_RATE, wav)
37
 
38
  # ---------- Gradio UI ----------
@@ -45,7 +55,7 @@ demo = gr.Interface(
45
  ),
46
  outputs=gr.Audio(
47
  label="Generated speech",
48
- type="numpy",
49
  ),
50
  title="Sinhala TTS (Coqui VITS)",
51
  description="Fine-tuned Sinhala TTS model using Coqui-TTS.",
 
1
  import json
2
  import os
3
 
4
+ import numpy as np # NEW: import numpy
5
  import gradio as gr
6
  from TTS.utils.synthesizer import Synthesizer
7
 
8
  # ---------- Paths ----------
9
+ MODEL_PATH = "checkpoint_80000.pth" # or "best_model_23206.pth"
 
10
  CONFIG_PATH = "config.json"
11
 
12
  # ---------- Load config to get sample rate ----------
13
  with open(CONFIG_PATH, "r", encoding="utf-8") as f:
14
  cfg = json.load(f)
15
 
 
16
  SAMPLE_RATE = cfg.get("audio", {}).get("sample_rate", 24000)
17
 
18
  # ---------- Load Coqui TTS Synthesizer ----------
 
19
  synthesizer = Synthesizer(
20
  tts_checkpoint=MODEL_PATH,
21
  tts_config_path=CONFIG_PATH,
22
+ use_cuda=False, # Set True only if you enable GPU hardware in the Space
23
  )
24
 
25
  # ---------- Inference function ----------
 
27
  if not text.strip():
28
  return None
29
 
 
30
  wav = synthesizer.tts(text)
31
 
32
+ # Coqui sometimes returns:
33
+ # - list of floats
34
+ # - or list of numpy arrays (one per sentence)
35
+ if isinstance(wav, list):
36
+ if len(wav) == 0:
37
+ return None
38
+ if hasattr(wav[0], "dtype"):
39
+ # list of numpy arrays -> concatenate
40
+ wav = np.concatenate(wav)
41
+ else:
42
+ # list of floats -> convert to numpy array
43
+ wav = np.array(wav, dtype="float32")
44
+
45
+ # Ensure it's a 1D numpy array for Gradio
46
  return (SAMPLE_RATE, wav)
47
 
48
  # ---------- Gradio UI ----------
 
55
  ),
56
  outputs=gr.Audio(
57
  label="Generated speech",
58
+ type="numpy", # Gradio expects (sr, np.ndarray)
59
  ),
60
  title="Sinhala TTS (Coqui VITS)",
61
  description="Fine-tuned Sinhala TTS model using Coqui-TTS.",