Files changed (1) hide show
  1. app.py +50 -32
app.py CHANGED
@@ -1,41 +1,59 @@
1
- import spaces
2
  import gradio as gr
3
- import torch
4
- from TTS.api import TTS
5
  import os
6
- os.environ["COQUI_TOS_AGREED"] = "1"
7
 
8
- device = "cuda"
 
 
 
 
9
 
10
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 
 
 
 
 
 
 
11
 
12
- @spaces.GPU(enable_queue=True)
13
- def clone(text, audio):
14
- tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav")
15
- return "./output.wav"
 
16
 
17
- iface = gr.Interface(fn=clone,
18
- inputs=[gr.Textbox(label='Text'),gr.Audio(type='filepath', label='Voice reference audio file')],
19
- outputs=gr.Audio(type='filepath'),
20
- title='Voice Clone',
21
- description="""
22
- by [Tony Assi](https://www.tonyassi.com/)
23
 
24
- ---
 
 
 
 
 
 
 
 
 
 
25
 
26
- <h3>If you like voice clone then try <a href="https://huggingface.co/spaces/tonyassi/video-face-swap" target="_blank" rel="noopener noreferrer">Video Face Swap</a></h3>
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- ---
29
-
30
- This space uses xtts_v2 model. Non-commercial use only. [Coqui Public Model License](https://huggingface.co/coqui/XTTS-v2/blob/main/LICENSE.txt)
31
-
32
- Please ❤️ this Space. [Email me](mailto:tony.assi.media@gmail.com).
33
- """,
34
- theme = gr.themes.Base(primary_hue="teal",secondary_hue="teal",neutral_hue="slate"),
35
- examples=[["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.","./audio/Wizard-of-Oz-Dorthy.wav"],
36
- ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.","./audio/Godfather.wav"],
37
- ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.","./audio/Paris-Hilton.mp3"],
38
- ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.","./audio/Megan-Fox.mp3"],
39
- ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.","./audio/Jeff-Goldblum.mp3"],
40
- ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.","./audio/Heath-Ledger.mp3"],])
41
- iface.launch()
 
 
1
  import gradio as gr
2
+ import tempfile
 
3
  import os
4
+ import asyncio
5
 
6
+ try:
7
+ import edge_tts
8
+ EDGE_TTS_AVAILABLE = True
9
+ except ImportError:
10
+ EDGE_TTS_AVAILABLE = False
11
 
12
+ VOICES = {
13
+ "EN-US": "en-US-AriaNeural",
14
+ "ES-ES": "es-ES-ElviraNeural",
15
+ "ZH-CN": "zh-CN-XiaoxiaoNeural",
16
+ "JA-JP": "ja-JP-NanamiNeural",
17
+ "FR-FR": "fr-FR-DeniseNeural",
18
+ "DE-DE": "de-DE-KatjaNeural",
19
+ }
20
 
21
+ MALE_VOICES = {
22
+ "EN-US": "en-US-GuyNeural",
23
+ "ES-ES": "es-ES-AlvaroNeural",
24
+ "ZH-CN": "zh-CN-YunxiNeural",
25
+ }
26
 
27
+ async def generate_speech(text, voice, rate="+0%", pitch="+0Hz"):
28
+ communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, pitch=pitch)
29
+ output_path = tempfile.mktemp(suffix='.mp3')
30
+ await communicate.save(output_path)
31
+ return output_path
 
32
 
33
+ def process_tts(text, language="EN-US", gender="Female", speed=50, pitch=50):
34
+ if not text.strip():
35
+ return None, "Please enter text"
36
+ try:
37
+ voice = MALE_VOICES.get(language, VOICES.get(language)) if gender == "Male" else VOICES.get(language)
38
+ rate = f"{'+' if speed >= 50 else ''}{speed - 50}%"
39
+ pitch_str = f"{'+' if pitch-50 >= 0 else ''}{(pitch-50) * 2}Hz"
40
+ output_path = asyncio.run(generate_speech(text, voice, rate, pitch_str))
41
+ return output_path, f"Success! Voice: {voice}"
42
+ except Exception as e:
43
+ return None, f"Error: {str(e)}"
44
 
45
+ with gr.Blocks(title="Voice API", theme=gr.themes.Soft()) as demo:
46
+ gr.Markdown("# Voice Synthesis API")
47
+ text = gr.Textbox(label="Text", lines=4)
48
+ with gr.Row():
49
+ lang = gr.Dropdown(choices=list(VOICES.keys()), value="EN-US", label="Language")
50
+ gender = gr.Radio(choices=["Female", "Male"], value="Female", label="Gender")
51
+ with gr.Row():
52
+ speed = gr.Slider(0, 100, 50, label="Speed")
53
+ pitch = gr.Slider(0, 100, 50, label="Pitch")
54
+ btn = gr.Button("Generate", variant="primary")
55
+ audio = gr.Audio(label="Output")
56
+ status = gr.Textbox(label="Status")
57
+ btn.click(process_tts, [text, lang, gender, speed, pitch], [audio, status])
58
 
59
+ demo.launch()