Spaces:

tonyassi
/

voice-clone

Running on Zero

App Files Files Community

Create app.py

#52

by DSGSJ - opened Mar 25

base: refs/heads/main

←

from: refs/pr/52

Discussion Files changed

+50

-32

Files changed (1) hide show

app.py +50 -32

app.py CHANGED Viewed

@@ -1,41 +1,59 @@
-import spaces
 import gradio as gr
-import torch
-from TTS.api import TTS
 import os
-os.environ["COQUI_TOS_AGREED"] = "1"
-device = "cuda"
-tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
-@spaces.GPU(enable_queue=True)
-def clone(text, audio):
-    tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav")
-    return "./output.wav"
-iface = gr.Interface(fn=clone,
-                     inputs=[gr.Textbox(label='Text'),gr.Audio(type='filepath', label='Voice reference audio file')],
-                     outputs=gr.Audio(type='filepath'),
-                     title='Voice Clone',
-                     description="""
-                     by [Tony Assi](https://www.tonyassi.com/)
-                     ---
-                     <h3>If you like voice clone then try <a href="https://huggingface.co/spaces/tonyassi/video-face-swap" target="_blank" rel="noopener noreferrer">Video Face Swap</a></h3>
-                     ---
-                     This space uses xtts_v2 model. Non-commercial use only. [Coqui Public Model License](https://huggingface.co/coqui/XTTS-v2/blob/main/LICENSE.txt)
-                     Please ❤️ this Space. [Email me](mailto:tony.assi.media@gmail.com).
-                     """,
-                     theme = gr.themes.Base(primary_hue="teal",secondary_hue="teal",neutral_hue="slate"),
-                     examples=[["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.","./audio/Wizard-of-Oz-Dorthy.wav"],
-                               ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.","./audio/Godfather.wav"],
-                               ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.","./audio/Paris-Hilton.mp3"],
-                               ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.","./audio/Megan-Fox.mp3"],
-                               ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.","./audio/Jeff-Goldblum.mp3"],
-                               ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.","./audio/Heath-Ledger.mp3"],])
-iface.launch()

 import gradio as gr
+import tempfile
 import os
+import asyncio
+try:
+    import edge_tts
+    EDGE_TTS_AVAILABLE = True
+except ImportError:
+    EDGE_TTS_AVAILABLE = False
+VOICES = {
+    "EN-US": "en-US-AriaNeural",
+    "ES-ES": "es-ES-ElviraNeural",
+    "ZH-CN": "zh-CN-XiaoxiaoNeural",
+    "JA-JP": "ja-JP-NanamiNeural",
+    "FR-FR": "fr-FR-DeniseNeural",
+    "DE-DE": "de-DE-KatjaNeural",
+}
+MALE_VOICES = {
+    "EN-US": "en-US-GuyNeural",
+    "ES-ES": "es-ES-AlvaroNeural",
+    "ZH-CN": "zh-CN-YunxiNeural",
+}
+async def generate_speech(text, voice, rate="+0%", pitch="+0Hz"):
+    communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, pitch=pitch)
+    output_path = tempfile.mktemp(suffix='.mp3')
+    await communicate.save(output_path)
+    return output_path
+def process_tts(text, language="EN-US", gender="Female", speed=50, pitch=50):
+    if not text.strip():
+        return None, "Please enter text"
+    try:
+        voice = MALE_VOICES.get(language, VOICES.get(language)) if gender == "Male" else VOICES.get(language)
+        rate = f"{'+' if speed >= 50 else ''}{speed - 50}%"
+        pitch_str = f"{'+' if pitch-50 >= 0 else ''}{(pitch-50) * 2}Hz"
+        output_path = asyncio.run(generate_speech(text, voice, rate, pitch_str))
+        return output_path, f"Success! Voice: {voice}"
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+with gr.Blocks(title="Voice API", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Voice Synthesis API")
+    text = gr.Textbox(label="Text", lines=4)
+    with gr.Row():
+        lang = gr.Dropdown(choices=list(VOICES.keys()), value="EN-US", label="Language")
+        gender = gr.Radio(choices=["Female", "Male"], value="Female", label="Gender")
+    with gr.Row():
+        speed = gr.Slider(0, 100, 50, label="Speed")
+        pitch = gr.Slider(0, 100, 50, label="Pitch")
+    btn = gr.Button("Generate", variant="primary")
+    audio = gr.Audio(label="Output")
+    status = gr.Textbox(label="Status")
+    btn.click(process_tts, [text, lang, gender, speed, pitch], [audio, status])
+demo.launch()