ayscript commited on
Commit
0ce63df
·
verified ·
1 Parent(s): b5fe085

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import soundfile as sf
5
+ from transformers import AutoProcessor, VitsModel
6
+
7
+
8
+ HF_TOKEN = os.getenv("HF_TOKEN")
9
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ TTS_MODELS = {
12
+ "yoruba": "facebook/mms-tts-yor",
13
+ "hausa": "facebook/mms-tts-hau",
14
+ }
15
+
16
+
17
+ tts_engines = {}
18
+
19
+ for lang, model_id in TTS_MODELS.items():
20
+ print(f"Loading TTS model for {lang}...")
21
+
22
+ processor = AutoProcessor.from_pretrained(
23
+ model_id,
24
+ token=HF_TOKEN
25
+ )
26
+
27
+ model = VitsModel.from_pretrained(
28
+ model_id,
29
+ token=HF_TOKEN
30
+ ).to(DEVICE)
31
+
32
+ model.eval()
33
+
34
+ tts_engines[lang] = {
35
+ "processor": processor,
36
+ "model": model
37
+ }
38
+
39
+ print("All TTS models loaded successfully")
40
+
41
+
42
+ def synthesize_speech(text, language):
43
+ if not text.strip():
44
+ return None
45
+
46
+ language = language.lower()
47
+ if language not in tts_engines:
48
+ return None
49
+
50
+ processor = tts_engines[language]["processor"]
51
+ model = tts_engines[language]["model"]
52
+
53
+ inputs = processor(
54
+ text=text,
55
+ return_tensors="pt"
56
+ ).to(DEVICE)
57
+
58
+ with torch.no_grad():
59
+ output = model(**inputs)
60
+
61
+ audio = output.waveform.squeeze().cpu().numpy()
62
+
63
+ output_path = "tts_output.wav"
64
+ sf.write(output_path, audio, 16000)
65
+
66
+ return output_path
67
+
68
+
69
+ demo = gr.Interface(
70
+ fn=synthesize_speech,
71
+ inputs=[
72
+ gr.Textbox(label="Text"),
73
+ gr.Dropdown(
74
+ choices=["yoruba", "hausa"],
75
+ label="Language"
76
+ )
77
+ ],
78
+ outputs=gr.Audio(type="filepath", label="Generated Speech"),
79
+ title="HealthAtlas Nigerian TTS Service",
80
+ description="Text → Speech (Yoruba & Hausa)",
81
+ allow_flagging="never"
82
+ )
83
+
84
+
85
+ if __name__ == "__main__":
86
+ demo.launch()