DhanuakaDev commited on
Commit
eda855d
·
1 Parent(s): 43e61e8

changed6 app.py

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. app.py +39 -20
  3. checkpoint_80000.pth +0 -3
  4. requirements.txt +1 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -1,36 +1,55 @@
1
- import json
2
  import os
 
3
 
4
  import numpy as np
5
  import gradio as gr
6
- from TTS.utils.synthesizer import Synthesizer
 
7
 
8
- # ---------- Paths ----------
9
- MODEL_PATH = "checkpoint_80000.pth" # or "best_model_23206.pth"
10
- CONFIG_PATH = "config.json"
 
11
 
12
- # ---------- Load config to get sample rate ----------
13
- with open(CONFIG_PATH, "r", encoding="utf-8") as f:
14
- cfg = json.load(f)
15
 
16
- SAMPLE_RATE = cfg.get("audio", {}).get("sample_rate", 24000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # ---------- Load Coqui TTS Synthesizer ----------
19
- synthesizer = Synthesizer(
20
- tts_checkpoint=MODEL_PATH,
21
- tts_config_path=CONFIG_PATH,
22
- use_cuda=False, # Set True only if you enable GPU in the Space
 
23
  )
24
 
 
 
25
  # ---------- Inference function ----------
26
  def tts_generate(text: str):
27
- if not text.strip():
 
28
  return None
29
 
30
- # Whatever Coqui returns (list, numpy array, etc.)
31
- wav = synthesizer.tts(text)
32
 
33
- # Convert to a 1D numpy array for Gradio
34
  wav = np.asarray(wav, dtype="float32").flatten()
35
 
36
  # Gradio Audio(type="numpy") expects (sample_rate, np.ndarray)
@@ -48,8 +67,8 @@ demo = gr.Interface(
48
  label="Generated speech",
49
  type="numpy",
50
  ),
51
- title="Sinhala TTS ",
52
- description="Sinhala TTS model research-stage model.",
53
  )
54
 
55
  if __name__ == "__main__":
 
 
1
  import os
2
+ from pathlib import Path
3
 
4
  import numpy as np
5
  import gradio as gr
6
+ from huggingface_hub import hf_hub_download
7
+ from TTS.api import TTS
8
 
9
+ # ---------- Config for your private model repo ----------
10
+ REPO_ID = "DhanuakaDev/SinTts-prev-v0.1" # private model repo
11
+ CHECKPOINT_FILENAME = "checkpoint_80000.pth" # change if your file name differs
12
+ CONFIG_FILENAME = "config.json"
13
 
14
+ # Get token from Space secret (Settings -> Variables and secrets)
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
 
16
 
17
+ # ---------- Download files from private repo ----------
18
+ # hf_hub_download returns a local path in the cache
19
+ checkpoint_path = hf_hub_download(
20
+ repo_id=REPO_ID,
21
+ filename=CHECKPOINT_FILENAME,
22
+ token=HF_TOKEN, # required for private repos
23
+ repo_type="model", # explicit, though "model" is default
24
+ )
25
+
26
+ config_path = hf_hub_download(
27
+ repo_id=REPO_ID,
28
+ filename=CONFIG_FILENAME,
29
+ token=HF_TOKEN,
30
+ repo_type="model",
31
+ )
32
 
33
+ # ---------- Load TTS model (same style as your local script) ----------
34
+ tts = TTS(
35
+ model_path=str(checkpoint_path),
36
+ config_path=str(config_path),
37
+ progress_bar=False,
38
+ gpu=False, # Space uses CPU; enable GPU only if you switch hardware
39
  )
40
 
41
+ SAMPLE_RATE = tts.synthesizer.output_sample_rate
42
+
43
  # ---------- Inference function ----------
44
  def tts_generate(text: str):
45
+ text = text.strip()
46
+ if not text:
47
  return None
48
 
49
+ # Generate audio (same call as in your local script)
50
+ wav = tts.tts(text)
51
 
52
+ # Ensure numpy 1D array for Gradio
53
  wav = np.asarray(wav, dtype="float32").flatten()
54
 
55
  # Gradio Audio(type="numpy") expects (sample_rate, np.ndarray)
 
67
  label="Generated speech",
68
  type="numpy",
69
  ),
70
+ title="Sinhala TTS (Coqui VITS)",
71
+ description="Fine-tuned Sinhala TTS model using Coqui-TTS.",
72
  )
73
 
74
  if __name__ == "__main__":
checkpoint_80000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0952ffa374cd2a2c48b9f6e7c7b917052e1502a21d19f675b2523a34d66fbe6
3
- size 997797878
 
 
 
 
requirements.txt CHANGED
@@ -9,3 +9,4 @@ soundfile
9
  librosa
10
  numpy
11
  scipy
 
 
9
  librosa
10
  numpy
11
  scipy
12
+ huggingface_hub