|
|
import os |
|
|
import tempfile |
|
|
import gradio as gr |
|
|
import librosa |
|
|
import soundfile as sf |
|
|
import warnings |
|
|
warnings.filterwarnings("ignore", category=FutureWarning) |
|
|
|
|
|
import subprocess |
|
|
|
|
|
|
|
|
model_cache_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2") |
|
|
if os.path.exists(model_cache_path): |
|
|
subprocess.run(["rm", "-rf", model_cache_path], check=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from torch.serialization import add_safe_globals |
|
|
from TTS.tts.configs.xtts_config import XttsConfig |
|
|
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs |
|
|
from TTS.config.shared_configs import BaseDatasetConfig |
|
|
|
|
|
add_safe_globals([ |
|
|
XttsConfig, |
|
|
XttsAudioConfig, |
|
|
XttsArgs, |
|
|
BaseDatasetConfig |
|
|
]) |
|
|
|
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
|
|
|
from TTS.api import TTS |
|
|
|
|
|
tts = TTS( |
|
|
model_name="tts_models/multilingual/multi-dataset/xtts_v2", |
|
|
progress_bar=True, |
|
|
gpu=False |
|
|
) |
|
|
|
|
|
|
|
|
def text_to_speech_clone(text, voice_sample): |
|
|
if voice_sample is None: |
|
|
return "Please provide a voice sample audio.", None |
|
|
|
|
|
|
|
|
sample_wav, sample_rate = librosa.load(voice_sample, sr=22050) |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample: |
|
|
sf.write(tmp_sample.name, sample_wav, sample_rate) |
|
|
voice_sample_path = tmp_sample.name |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output: |
|
|
tts.tts_to_file( |
|
|
text=text, |
|
|
file_path=tmp_output.name, |
|
|
speaker_wav=voice_sample_path, |
|
|
language="hi" |
|
|
) |
|
|
output_path = tmp_output.name |
|
|
|
|
|
return output_path |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=text_to_speech_clone, |
|
|
inputs=[ |
|
|
gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"), |
|
|
gr.Audio(type="filepath", label="Voice Sample (Hindi speech)") |
|
|
], |
|
|
outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"), |
|
|
title="Hindi Text-to-Speech with Voice Cloning", |
|
|
description=( |
|
|
"यह ऐप हिंदी टेक्स्ट से वॉयस क्लोनिंग के साथ स्पीच जेनरेट करता है।\n" |
|
|
"एक छोटी सी हिंदी आवाज़ की रिकॉर्डिंग (5-10 सेकंड) अपलोड करें, और यह उसी आवाज़ में टेक्स्ट पढ़कर सुनाएगा।" |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |
|
|
|