crackuser commited on
Commit
19173b4
Β·
verified Β·
1 Parent(s): fd59512

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -166
app.py CHANGED
@@ -1,185 +1,82 @@
1
  import gradio as gr
2
  import torch
3
- from TTS.api import TTS
4
- import os
5
- import tempfile
6
  import soundfile as sf
 
 
7
 
8
- # Set environment variable for Coqui TOS
9
- os.environ["COQUI_TOS_AGREED"] = "1"
10
-
11
- # Initialize device
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- print(f"Using device: {device}")
14
-
15
- # Initialize TTS model
16
- try:
17
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
18
- print("βœ… XTTS v2 model loaded successfully!")
19
- except Exception as e:
20
- print(f"❌ Error loading model: {e}")
21
- tts = None
22
-
23
- def clone_voice(text, reference_audio):
24
  """
25
- Clone voice using XTTS v2 model
26
  """
27
- if not text or not text.strip():
28
- return None, "❌ Please enter some text to convert!"
29
-
30
- if not reference_audio:
31
- return None, "❌ Please upload a reference audio file!"
32
-
33
- if tts is None:
34
- return None, "❌ TTS model not loaded properly!"
35
-
36
  try:
37
- # Validate text length
38
- if len(text) > 500:
39
- return None, "❌ Text too long! Please keep it under 500 characters."
40
 
41
- # Create temporary output file
42
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
43
- output_path = tmp_file.name
44
 
45
- # Generate cloned voice
46
- print(f"🎀 Cloning voice for text: {text[:50]}...")
47
- tts.tts_to_file(
48
- text=text,
49
- speaker_wav=reference_audio,
50
- language="en",
51
- file_path=output_path
52
- )
53
 
54
- # Verify output file exists and has content
55
- if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
56
- return output_path, f"βœ… Voice cloning successful!\n🎡 Generated audio for: '{text[:100]}{'...' if len(text) > 100 else ''}'"
57
- else:
58
- return None, "❌ Failed to generate audio file!"
59
-
60
- except Exception as e:
61
- error_msg = str(e)
62
- print(f"❌ Voice cloning error: {error_msg}")
63
 
64
- if "CUDA" in error_msg:
65
- return None, "❌ GPU memory error! Try with shorter text or restart the space."
66
- elif "audio" in error_msg.lower():
67
- return None, "❌ Audio processing error! Please upload a clear WAV or MP3 file."
68
- else:
69
- return None, f"❌ Error: {error_msg}"
70
 
71
  # Create Gradio interface
72
- def create_interface():
73
- with gr.Blocks(
74
- title="🎭 Voice Cloning Studio",
75
- theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
76
- ) as demo:
77
-
78
- # Header
79
- gr.HTML("""
80
- <div style="text-align: center; padding: 20px;">
81
- <h1 style="color: #2E86AB; margin-bottom: 10px;">🎭 AI Voice Cloning Studio</h1>
82
- <p style="color: #666; font-size: 18px;">Clone any voice with advanced AI technology</p>
83
- </div>
84
- """)
85
-
86
- with gr.Row():
87
- with gr.Column(scale=1):
88
- # Input section
89
- gr.HTML("<h3 style='color: #2E86AB;'>πŸ“€ Upload Reference Voice</h3>")
90
- reference_audio = gr.Audio(
91
- label="Reference Audio (10+ seconds recommended)",
92
- type="filepath",
93
- sources=["upload"]
94
- )
95
-
96
- gr.HTML("<h3 style='color: #2E86AB;'>πŸ“ Enter Text to Clone</h3>")
97
- text_input = gr.Textbox(
98
- label="Text to Convert",
99
- placeholder="Enter the text you want to speak in the cloned voice...",
100
- lines=4,
101
- max_lines=6
102
- )
103
-
104
- clone_button = gr.Button(
105
- "🎀 Clone Voice",
106
- variant="primary",
107
- size="lg"
108
- )
109
-
110
- with gr.Column(scale=1):
111
- # Output section
112
- gr.HTML("<h3 style='color: #2E86AB;'>🎡 Cloned Voice Output</h3>")
113
- audio_output = gr.Audio(
114
- label="Generated Audio",
115
- type="filepath"
116
- )
117
-
118
- status_output = gr.Textbox(
119
- label="Status",
120
- lines=3,
121
- interactive=False
122
- )
123
-
124
- # Examples section
125
- gr.HTML("<h3 style='color: #2E86AB;'>πŸ’‘ Example Texts</h3>")
126
- examples = [
127
- "Hello, this is a demonstration of AI voice cloning technology.",
128
- "Welcome to the future of artificial intelligence and speech synthesis.",
129
- "This voice was generated using advanced machine learning models.",
130
- "Experience the power of AI-driven voice generation with natural speech patterns."
131
- ]
132
-
133
- gr.Examples(
134
- examples=examples,
135
- inputs=text_input,
136
- label="Click to try these examples:"
137
- )
138
-
139
- # How it works
140
- with gr.Accordion("πŸ” How It Works", open=False):
141
- gr.Markdown("""
142
- ### The Technology
143
- 1. **🎀 Voice Upload**: Upload 10+ seconds of clear speech
144
- 2. **🧠 AI Analysis**: XTTS v2 model analyzes voice characteristics
145
- 3. **πŸ“ Text Input**: Enter the text you want to convert
146
- 4. **🎡 Voice Synthesis**: Generate speech that matches the uploaded voice
147
 
148
- ### Tips for Best Results
149
- - Use high-quality, clear audio recordings
150
- - Ensure 10+ seconds of continuous speech
151
- - Avoid background noise and music
152
- - Single speaker only in reference audio
 
153
 
154
- ### Supported Languages
155
- - English (primary)
156
- - Spanish, French, German, Italian, Portuguese
157
- - Chinese, Japanese, Korean
158
- """)
159
 
160
- # Event handlers
161
- clone_button.click(
162
- fn=clone_voice,
163
- inputs=[text_input, reference_audio],
164
- outputs=[audio_output, status_output],
165
- show_progress=True
166
- )
167
-
168
- # Auto-generate on Enter
169
- text_input.submit(
170
- fn=clone_voice,
171
- inputs=[text_input, reference_audio],
172
- outputs=[audio_output, status_output],
173
- show_progress=True
174
- )
 
175
 
176
- return demo
 
 
 
 
 
177
 
178
- # Launch the app
179
  if __name__ == "__main__":
180
- demo = create_interface()
181
- demo.launch(
182
- server_name="0.0.0.0",
183
- server_port=7860,
184
- share=False
185
- )
 
1
  import gradio as gr
2
  import torch
3
+ import numpy as np
 
 
4
  import soundfile as sf
5
+ import tempfile
6
+ import os
7
 
8
+ def voice_clone_demo(reference_audio, input_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  """
10
+ Demo voice cloning function
11
  """
 
 
 
 
 
 
 
 
 
12
  try:
13
+ if not reference_audio:
14
+ return None, "❌ Please upload reference audio!"
 
15
 
16
+ if not input_text or not input_text.strip():
17
+ return None, "❌ Please enter text to convert!"
 
18
 
19
+ # For demo purposes, return the reference audio
20
+ # In production, this would call actual voice cloning APIs
 
 
 
 
 
 
21
 
22
+ return reference_audio, f"βœ… Demo: Would clone '{input_text[:50]}...' using uploaded voice"
 
 
 
 
 
 
 
 
23
 
24
+ except Exception as e:
25
+ return None, f"❌ Error: {str(e)}"
 
 
 
 
26
 
27
  # Create Gradio interface
28
+ with gr.Blocks(
29
+ title="🎭 Voice Cloning Studio",
30
+ theme=gr.themes.Soft(primary_hue="blue")
31
+ ) as demo:
32
+
33
+ gr.HTML("""
34
+ <div style="text-align: center; padding: 20px;">
35
+ <h1 style="color: #2E86AB;">🎭 AI Voice Cloning Studio</h1>
36
+ <p style="color: #666; font-size: 18px;">Clone any voice with AI technology</p>
37
+ </div>
38
+ """)
39
+
40
+ with gr.Row():
41
+ with gr.Column():
42
+ gr.HTML("<h3>πŸ“€ Upload Reference Voice</h3>")
43
+ reference_audio = gr.Audio(
44
+ label="Reference Audio (10+ seconds)",
45
+ type="filepath"
46
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ gr.HTML("<h3>πŸ“ Enter Text</h3>")
49
+ text_input = gr.Textbox(
50
+ label="Text to Convert",
51
+ placeholder="Enter text to speak in the cloned voice...",
52
+ lines=4
53
+ )
54
 
55
+ clone_button = gr.Button("🎀 Clone Voice", variant="primary")
 
 
 
 
56
 
57
+ with gr.Column():
58
+ gr.HTML("<h3>🎡 Output</h3>")
59
+ audio_output = gr.Audio(label="Cloned Voice")
60
+ status_output = gr.Textbox(label="Status", interactive=False)
61
+
62
+ # Examples
63
+ examples = [
64
+ "Hello, this is a demonstration of voice cloning technology.",
65
+ "Welcome to the future of AI-powered speech synthesis.",
66
+ "This voice was generated using advanced machine learning."
67
+ ]
68
+
69
+ gr.Examples(
70
+ examples=examples,
71
+ inputs=text_input
72
+ )
73
 
74
+ # Event handler
75
+ clone_button.click(
76
+ fn=voice_clone_demo,
77
+ inputs=[reference_audio, text_input],
78
+ outputs=[audio_output, status_output]
79
+ )
80
 
 
81
  if __name__ == "__main__":
82
+ demo.launch()