TTS_Hindi / app.py
NLPV's picture
Update app.py
40dde11 verified
import os
import tempfile
import gradio as gr
import librosa
import soundfile as sf
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# This will delete cached model to force a clean download
import subprocess
# Clear cached XTTS model to force a fresh download (only needed once)
model_cache_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
if os.path.exists(model_cache_path):
subprocess.run(["rm", "-rf", model_cache_path], check=True)
# ===== Step 1: Allowlist Required Classes for PyTorch >= 2.6 =====
from torch.serialization import add_safe_globals
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
from TTS.config.shared_configs import BaseDatasetConfig
add_safe_globals([
XttsConfig,
XttsAudioConfig,
XttsArgs,
BaseDatasetConfig
])
# ===== Step 2: Agree to Coqui TTS Terms of Service =====
os.environ["COQUI_TOS_AGREED"] = "1"
# ===== Step 3: Load the Coqui XTTS Model =====
from TTS.api import TTS
tts = TTS(
model_name="tts_models/multilingual/multi-dataset/xtts_v2",
progress_bar=True,
gpu=False # Set to True if using CUDA
)
# ===== Step 4: Define Voice Cloning Inference Function =====
def text_to_speech_clone(text, voice_sample):
if voice_sample is None:
return "Please provide a voice sample audio.", None
# Load the voice sample audio file
sample_wav, sample_rate = librosa.load(voice_sample, sr=22050)
# Save sample temporarily in correct format
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample:
sf.write(tmp_sample.name, sample_wav, sample_rate)
voice_sample_path = tmp_sample.name
# Generate cloned Hindi speech and save it to a temp file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output:
tts.tts_to_file(
text=text,
file_path=tmp_output.name,
speaker_wav=voice_sample_path,
language="hi"
)
output_path = tmp_output.name
return output_path
# ===== Step 5: Gradio UI Interface =====
iface = gr.Interface(
fn=text_to_speech_clone,
inputs=[
gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
gr.Audio(type="filepath", label="Voice Sample (Hindi speech)")
],
outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"),
title="Hindi Text-to-Speech with Voice Cloning",
description=(
"यह ऐप हिंदी टेक्स्ट से वॉयस क्लोनिंग के साथ स्पीच जेनरेट करता है।\n"
"एक छोटी सी हिंदी आवाज़ की रिकॉर्डिंग (5-10 सेकंड) अपलोड करें, और यह उसी आवाज़ में टेक्स्ट पढ़कर सुनाएगा।"
)
)
# ===== Step 6: Launch the Web App =====
iface.launch()