Spaces:
Runtime error
Runtime error
| # app.py — VClone (HF Space, 1-column, persistent output, DownloadButton) | |
| import os, re, tempfile, shutil, time | |
| import numpy as np | |
| import soundfile as sf | |
| import gradio as gr | |
| # load custom css from external file | |
| with open("style.css") as f: | |
| custom_css = f.read() | |
| os.environ.setdefault("COQUI_TOS_AGREED", "1") | |
| MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2" | |
| LANGS = [ | |
| ("English", "en"), | |
| ("Spanish", "es"), | |
| ("French", "fr"), | |
| ("German", "de"), | |
| ("Italian", "it"), | |
| ("Portuguese", "pt"), | |
| ("Polish", "pl"), | |
| ("Turkish", "tr"), | |
| ("Russian", "ru"), | |
| ("Dutch", "nl"), | |
| ("Czech", "cs"), | |
| ("Arabic", "ar"), | |
| ("Chinese (Simplified)", "zh-cn"), | |
| ("Hungarian", "hu"), | |
| ("Korean", "ko"), | |
| ("Japanese","ja"), | |
| ("Hindi", "hi"), | |
| ] | |
| LANG_LABELS = [name for name, _ in LANGS] | |
| LANG_MAP = {name: code for name, code in LANGS} | |
| _tts = None | |
| def get_tts(): | |
| global _tts | |
| if _tts is not None: | |
| return _tts | |
| try: | |
| import torch | |
| try: | |
| torch.set_num_threads(max(1, min(4, os.cpu_count() or 2))) | |
| except Exception: | |
| pass | |
| use_gpu = torch.cuda.is_available() | |
| except Exception: | |
| use_gpu = False | |
| from TTS.api import TTS | |
| try: | |
| _tts = TTS(MODEL_NAME, gpu=use_gpu) | |
| except TypeError: | |
| _tts = TTS(MODEL_NAME) | |
| return _tts | |
| def clean_text(t: str) -> str: | |
| return " ".join((t or "").strip().split()) | |
| def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed): | |
| try: | |
| tts.tts_to_file(text=txt, file_path=out_path, | |
| speaker_wav=wav_path, language=lang, speed=speed) | |
| except TypeError: | |
| tts.tts_to_file(text=txt, file_path=out_path, | |
| speaker_wav=wav_path, language=lang) | |
| def safe_filename(seed_text: str, lang_code: str) -> str: | |
| base = clean_text(seed_text)[:40] or "Vclone" | |
| base = re.sub(r"[^A-Za-z0-9_-]+", "_", base).strip("_") | |
| ts = time.strftime("%Y%m%d-%H%M%S") | |
| return f"{base}_{lang_code}_{ts}.wav" | |
| def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.Progress(track_tqdm=True)): | |
| if ref_audio is None: | |
| raise gr.Error("Upload a reference voice (10–60s, clean speech).") | |
| text = clean_text(text) | |
| if not text: | |
| raise gr.Error("Please enter some text.") | |
| if len(text) > 1400 and not split_sentences: | |
| raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.") | |
| lang = LANG_MAP.get(lang_label, "en") | |
| wav_path = ref_audio | |
| chunks = [text] | |
| if split_sentences: | |
| rough = [s.strip() for s in re.split(r'(?<=[.!?؟۔]|[\u0964\u0965])\s+', text) if s.strip()] | |
| chunks = [] | |
| for s in rough: | |
| if len(s) <= 220: | |
| chunks.append(s) | |
| else: | |
| for i in range(0, len(s), 200): | |
| chunks.append(s[i:i+200]) | |
| tts = get_tts() | |
| out_wavs = [] | |
| with tempfile.TemporaryDirectory() as td: | |
| total = max(len(chunks), 1) | |
| for i, chunk in enumerate(chunks, 1): | |
| progress((i-1)/total, desc=f"Synthesizing {i}/{total}") | |
| part_path = os.path.join(td, f"part_{i}.wav") | |
| synth_to_file_safe(tts, chunk, part_path, wav_path, lang, speed) | |
| data, sr = sf.read(part_path) | |
| out_wavs.append((data, sr)) | |
| # concat | |
| if len(out_wavs) == 1: | |
| final_data, sr = out_wavs[0] | |
| else: | |
| sr = out_wavs[0][1] | |
| final_data = np.concatenate([d for d, _ in out_wavs], axis=0) | |
| # write to persistent temp + copy to a nice-named path for downloading | |
| ntf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| ntf_path = ntf.name | |
| ntf.close() | |
| sf.write(ntf_path, final_data, sr) | |
| pretty_name = os.path.join("/tmp", safe_filename(text, lang)) | |
| try: | |
| shutil.copyfile(ntf_path, pretty_name) | |
| dl_path = pretty_name | |
| except Exception: | |
| dl_path = ntf_path # fallback | |
| # return both: audio preview path, and a file path for DownloadButton | |
| return ntf_path, dl_path | |
| with gr.Blocks(title="VClone - Voice Cloning & TTS", css=custom_css, analytics_enabled=False) as demo: | |
| with gr.Column(elem_id="wrap"): | |
| gr.Markdown("## Vclone.vip — Professional Realistic AI Voice Cloning in Seconds") | |
| gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. " | |
| "On free CPU, keep text short or enable **Auto split** for speed.") | |
| ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref") | |
| language = gr.Dropdown(choices=LANG_LABELS, value="English", label="Language", elem_id="lang") | |
| text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt") | |
| speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd") | |
| split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split") | |
| submit = gr.Button("Generate", variant="primary", elem_id="gen") | |
| output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False, elem_id="out_audio") | |
| download = gr.DownloadButton(label="Download audio", elem_id="dl") | |
| def run_and_return(text, ref_audio, language, speed, split): | |
| audio_path, dl_path = tts_clone(text, ref_audio, language, speed, split) | |
| # set button to download the file we just wrote | |
| return audio_path, gr.update(value=dl_path, label=f"Download ({os.path.basename(dl_path)})") | |
| submit.click(run_and_return, | |
| inputs=[text, ref_audio, language, speed, split], | |
| outputs=[output, download]) | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", "7860")) | |
| try: | |
| demo.queue().launch(server_name="0.0.0.0", server_port=port, show_error=True, show_api=True) | |
| except TypeError: | |
| demo.launch(server_name="0.0.0.0", server_port=port, show_error=True, show_api=True) | |