| import torch |
| import torchaudio |
| import numpy as np |
| import gradio as gr |
| from f5_tts.api import F5TTS |
| from f5_tts.model.utils import convert_char_to_pinyin |
| from f5_tts.infer.utils_infer import transcribe |
|
|
| |
| tts = F5TTS() |
|
|
| |
| try: |
| tts.load_ema_model("hindi_tts_checkpoint.bin") |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| |
| |
| def generate_speech(text): |
| """ |
| Convert Hindi text to speech and return the generated audio. |
| """ |
| ref_audio_path = "E:/tts new/ref_audio.wav" |
| ref_text = "यह संदर्भ ऑडियो का सामग्री, उपशीर्षक या लिप्यंतरण है।" |
|
|
| |
| if isinstance(text, list): |
| text = ' '.join(text) |
|
|
| |
| pinyin_text = convert_char_to_pinyin(text) |
|
|
| |
| if isinstance(pinyin_text, list): |
| pinyin_text = [item for sublist in pinyin_text for item in (sublist if isinstance(sublist, list) else [sublist])] |
|
|
| |
| pinyin_text = ' '.join(pinyin_text) |
|
|
| |
| try: |
| output_waveform, sampling_rate = tts.infer(ref_audio_path, ref_text, pinyin_text) |
| output_waveform = output_waveform.numpy() |
| return (sampling_rate, output_waveform) |
| except Exception as e: |
| return f"Error in speech generation: {e}" |
|
|
| |
| iface = gr.Interface( |
| fn=generate_speech, |
| inputs=gr.Textbox(label="Enter Hindi Text"), |
| outputs=gr.Audio(label="Generated Speech"), |
| title="Hindi Text-to-Speech (TTS)", |
| description="Enter Hindi text, and the model will generate a speech output.", |
| ) |
|
|
| |
| if __name__ == "__main__": |
| iface.launch() |
|
|