tts / app.py
Abhi2025's picture
Create app.py
c4ccf95 verified
import torch
import torchaudio
import numpy as np
import gradio as gr
from f5_tts.api import F5TTS
from f5_tts.model.utils import convert_char_to_pinyin
from f5_tts.infer.utils_infer import transcribe
# Initialize F5TTS
tts = F5TTS()
# Load the model (Ensure correct file path)
try:
tts.load_ema_model("hindi_tts_checkpoint.bin") # Ensure correct path to model
except Exception as e:
print(f"Error loading model: {e}")
# Function to generate speech
def generate_speech(text):
"""
Convert Hindi text to speech and return the generated audio.
"""
ref_audio_path = "E:/tts new/ref_audio.wav" # Full path to ref_audio.wav (correct the path)
ref_text = "यह संदर्भ ऑडियो का सामग्री, उपशीर्षक या लिप्यंतरण है।" # Reference text
# Ensure text is a single string (not a list)
if isinstance(text, list):
text = ' '.join(text) # Join list elements to form a single string
# Convert Hindi text to pinyin (if needed)
pinyin_text = convert_char_to_pinyin(text)
# Flatten any nested lists in pinyin_text
if isinstance(pinyin_text, list):
pinyin_text = [item for sublist in pinyin_text for item in (sublist if isinstance(sublist, list) else [sublist])]
# Now, join the flattened list into a string
pinyin_text = ' '.join(pinyin_text)
# Generate speech
try:
output_waveform, sampling_rate = tts.infer(ref_audio_path, ref_text, pinyin_text)
output_waveform = output_waveform.numpy() # Convert to NumPy format
return (sampling_rate, output_waveform)
except Exception as e:
return f"Error in speech generation: {e}"
# Gradio Interface
iface = gr.Interface(
fn=generate_speech,
inputs=gr.Textbox(label="Enter Hindi Text"),
outputs=gr.Audio(label="Generated Speech"),
title="Hindi Text-to-Speech (TTS)",
description="Enter Hindi text, and the model will generate a speech output.",
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()