Spaces:

NLPV
/

TTS_Hindi

Runtime error

App Files Files Community

TTS_Hindi / app.py

NLPV

Update app.py

40dde11 verified 10 months ago

raw

history blame contribute delete

3.11 kB

	import os
	import tempfile
	import gradio as gr
	import librosa
	import soundfile as sf
	import warnings
	warnings.filterwarnings("ignore", category=FutureWarning)
	# This will delete cached model to force a clean download
	import subprocess

	# Clear cached XTTS model to force a fresh download (only needed once)
	model_cache_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
	if os.path.exists(model_cache_path):
	subprocess.run(["rm", "-rf", model_cache_path], check=True)



	# ===== Step 1: Allowlist Required Classes for PyTorch >= 2.6 =====
	from torch.serialization import add_safe_globals
	from TTS.tts.configs.xtts_config import XttsConfig
	from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
	from TTS.config.shared_configs import BaseDatasetConfig

	add_safe_globals([
	XttsConfig,
	XttsAudioConfig,
	XttsArgs,
	BaseDatasetConfig
	])

	# ===== Step 2: Agree to Coqui TTS Terms of Service =====
	os.environ["COQUI_TOS_AGREED"] = "1"

	# ===== Step 3: Load the Coqui XTTS Model =====
	from TTS.api import TTS

	tts = TTS(
	model_name="tts_models/multilingual/multi-dataset/xtts_v2",
	progress_bar=True,
	gpu=False # Set to True if using CUDA
	)

	# ===== Step 4: Define Voice Cloning Inference Function =====
	def text_to_speech_clone(text, voice_sample):
	if voice_sample is None:
	return "Please provide a voice sample audio.", None

	# Load the voice sample audio file
	sample_wav, sample_rate = librosa.load(voice_sample, sr=22050)

	# Save sample temporarily in correct format
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample:
	sf.write(tmp_sample.name, sample_wav, sample_rate)
	voice_sample_path = tmp_sample.name

	# Generate cloned Hindi speech and save it to a temp file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output:
	tts.tts_to_file(
	text=text,
	file_path=tmp_output.name,
	speaker_wav=voice_sample_path,
	language="hi"
	)
	output_path = tmp_output.name

	return output_path

	# ===== Step 5: Gradio UI Interface =====
	iface = gr.Interface(
	fn=text_to_speech_clone,
	inputs=[
	gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
	gr.Audio(type="filepath", label="Voice Sample (Hindi speech)")
	],
	outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"),
	title="Hindi Text-to-Speech with Voice Cloning",
	description=(
	"यह ऐप हिंदी टेक्स्ट से वॉयस क्लोनिंग के साथ स्पीच जेनरेट करता है।\n"
	"एक छोटी सी हिंदी आवाज़ की रिकॉर्डिंग (5-10 सेकंड) अपलोड करें, और यह उसी आवाज़ में टेक्स्ट पढ़कर सुनाएगा।"
	)
	)

	# ===== Step 6: Launch the Web App =====
	iface.launch()