talkclone

Build error

App Files Files Community

asbgig commited on Aug 22, 2025

Commit

681b58a

verified ·

1 Parent(s): 13f3113

Create app.py

Browse files

Files changed (1) hide show

app.py +106 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import gradio as gr
+import tempfile, os
+import numpy as np
+import soundfile as sf
+from TTS.api import TTS
+# If Coqui shows a CPML prompt when downloading models,
+# try pre-accept via env var (safe no-op if ignored).
+os.environ.setdefault("COQUI_TOS_AGREED", "y")
+MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
+# Lazy-load to avoid heavy import before Space is ready
+_tts_obj = None
+def get_tts():
+    global _tts_obj
+    if _tts_obj is None:
+        _tts_obj = TTS(MODEL_NAME)
+    return _tts_obj
+LANGS = [
+    ("English", "en"), ("Urdu", "ur"), ("Hindi", "hi"), ("Arabic", "ar"),
+    ("French", "fr"), ("German", "de"), ("Spanish", "es"), ("Italian", "it"),
+    ("Portuguese", "pt"), ("Turkish", "tr"),
+]
+def clean_text(text: str) -> str:
+    return " ".join((text or "").strip().split())
+def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed):
+    try:
+        tts.tts_to_file(
+            text=txt, file_path=out_path,
+            speaker_wav=wav_path, language=lang, speed=speed,
+        )
+    except TypeError:
+        tts.tts_to_file(
+            text=txt, file_path=out_path,
+            speaker_wav=wav_path, language=lang,
+        )
+def tts_clone(text, ref_audio, language_code, speed, split_sentences):
+    if ref_audio is None:
+        raise gr.Error("Please upload a reference voice sample (10–60 seconds).")
+    text = clean_text(text)
+    if not text:
+        raise gr.Error("Please enter some text.")
+    tts = get_tts()
+    wav_path = ref_audio
+    chunks = [text]
+    if split_sentences:
+        import re
+        chunks = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
+    out_wavs = []
+    with tempfile.TemporaryDirectory() as td:
+        for i, chunk in enumerate(chunks, 1):
+            out_path = os.path.join(td, f"part_{i}.wav")
+            synth_to_file_safe(tts, chunk, out_path, wav_path, language_code, speed)
+            data, sr = sf.read(out_path)
+            out_wavs.append((data, sr))
+        if len(out_wavs) == 1:
+            final_data, sr = out_wavs[0]
+        else:
+            sr = out_wavs[0][1]
+            final_data = np.concatenate([d for d, _ in out_wavs], axis=0)
+        final_path = os.path.join(td, "output.wav")
+        sf.write(final_path, final_data, sr)
+        return final_path
+with gr.Blocks(title="TalkClone - Voice Cloning & TTS", css="#warning{border-left:4px solid #22c55e;padding-left:8px;}") as demo:
+    gr.Markdown("# TalkClone — Turn Text into Speech using a Reference Audio")
+    gr.Markdown(
+        "Upload a short, clean **reference voice** (10–60s), pick a **language**, type your **text**, and generate audio. "
+        "For best results: no music/background noise, single speaker, 16kHz+ WAV/MP3."
+    )
+    with gr.Row():
+        with gr.Column():
+            ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath")
+            language = gr.Dropdown(choices=LANGS, value="en", label="Language")
+            text = gr.Textbox(label="Text", lines=5, placeholder="Type your text here...")
+            speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
+            split = gr.Checkbox(value=True, label="Auto split long text by sentence")
+            submit = gr.Button("Generate", variant="primary")
+            gr.Markdown(
+                '<div id="warning"><strong>Consent & Safety:</strong> Only clone voices you have explicit permission to use. '
+                "Avoid public-figure impersonation and disclose AI-generated audio when required by law.</div>"
+            )
+        with gr.Column():
+            output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False)
+            download = gr.File(label="Download audio")
+    def run_and_return(text, ref_audio, language, speed, split):
+        out_path = tts_clone(text, ref_audio, language, speed, split)
+        return out_path, out_path
+    submit.click(run_and_return, inputs=[text, ref_audio, language, speed, split],
+                 outputs=[output, download])
+if __name__ == "__main__":
+    demo.launch()