Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import gradio as gr | |
| import soundfile as sf | |
| from transformers import AutoProcessor, VitsModel | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| TTS_MODELS = { | |
| "yoruba": "facebook/mms-tts-yor", | |
| "hausa": "facebook/mms-tts-hau", | |
| } | |
| tts_engines = {} | |
| for lang, model_id in TTS_MODELS.items(): | |
| print(f"Loading TTS model for {lang}...") | |
| processor = AutoProcessor.from_pretrained( | |
| model_id, | |
| token=HF_TOKEN | |
| ) | |
| model = VitsModel.from_pretrained( | |
| model_id, | |
| token=HF_TOKEN | |
| ).to(DEVICE) | |
| model.eval() | |
| tts_engines[lang] = { | |
| "processor": processor, | |
| "model": model | |
| } | |
| print("All TTS models loaded successfully") | |
| def synthesize_speech(text, language): | |
| if not text.strip(): | |
| return None | |
| language = language.lower() | |
| if language not in tts_engines: | |
| return None | |
| processor = tts_engines[language]["processor"] | |
| model = tts_engines[language]["model"] | |
| inputs = processor( | |
| text=text, | |
| return_tensors="pt" | |
| ).to(DEVICE) | |
| with torch.no_grad(): | |
| output = model(**inputs) | |
| audio = output.waveform.squeeze().cpu().numpy() | |
| output_path = "tts_output.wav" | |
| sf.write(output_path, audio, 16000) | |
| return output_path | |
| demo = gr.Interface( | |
| fn=synthesize_speech, | |
| inputs=[ | |
| gr.Textbox(label="Text"), | |
| gr.Dropdown( | |
| choices=["yoruba", "hausa"], | |
| label="Language" | |
| ) | |
| ], | |
| outputs=gr.Audio(type="filepath", label="Generated Speech"), | |
| title="HealthAtlas Nigerian TTS Service", | |
| description="Text → Speech (Yoruba & Hausa)", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |