crackuser commited on
Commit
986aa2a
Β·
verified Β·
1 Parent(s): 1bdf03a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -49
app.py CHANGED
@@ -1,65 +1,185 @@
1
  import gradio as gr
2
  import torch
3
- import torchaudio
4
- import numpy as np
5
- from transformers import AutoModel, AutoTokenizer
6
- import tempfile
7
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def clone_voice(reference_audio, input_text):
10
- """Voice cloning function"""
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- # Your voice cloning logic here
13
- # This is a basic template - replace with your actual model
14
-
15
- # Load your model (replace with actual model loading)
16
- # model = AutoModel.from_pretrained("your-model-name")
17
-
18
- # Process the reference audio
19
- if reference_audio is None:
20
- return None, "Please upload reference audio"
21
 
22
- # Simple echo for testing (replace with actual voice cloning)
23
- # In a real implementation, you'd:
24
- # 1. Process reference_audio to extract voice features
25
- # 2. Generate speech from input_text using those features
26
- # 3. Return the generated audio
27
 
28
- # For now, return the reference audio as a test
29
- return reference_audio, "Voice cloning completed (test mode)"
 
 
 
 
 
 
30
 
 
 
 
 
 
 
31
  except Exception as e:
32
- return None, f"Error: {str(e)}"
 
 
 
 
 
 
 
 
33
 
34
  # Create Gradio interface
35
- with gr.Blocks(title="Voice Cloning") as app:
36
- gr.Markdown("# 🎭 AI Voice Cloning")
37
- gr.Markdown("Upload reference audio and enter text to clone the voice.")
38
-
39
- with gr.Row():
40
- with gr.Column():
41
- reference_audio = gr.Audio(
42
- label="Reference Voice (10+ seconds)",
43
- type="filepath"
44
- )
45
- input_text = gr.Textbox(
46
- label="Text to Convert",
47
- placeholder="Enter the text you want to speak in the cloned voice...",
48
- lines=3
49
- )
50
- clone_btn = gr.Button("🎀 Clone Voice", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- with gr.Column():
53
- output_audio = gr.Audio(label="Cloned Voice Output")
54
- status_text = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Connect the function
57
- clone_btn.click(
58
- fn=clone_voice,
59
- inputs=[reference_audio, input_text],
60
- outputs=[output_audio, status_text]
61
- )
62
 
63
  # Launch the app
64
  if __name__ == "__main__":
65
- app.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ from TTS.api import TTS
 
 
 
4
  import os
5
+ import tempfile
6
+ import soundfile as sf
7
+
8
+ # Set environment variable for Coqui TOS
9
+ os.environ["COQUI_TOS_AGREED"] = "1"
10
+
11
+ # Initialize device
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ print(f"Using device: {device}")
14
+
15
+ # Initialize TTS model
16
+ try:
17
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
18
+ print("βœ… XTTS v2 model loaded successfully!")
19
+ except Exception as e:
20
+ print(f"❌ Error loading model: {e}")
21
+ tts = None
22
 
23
+ def clone_voice(text, reference_audio):
24
+ """
25
+ Clone voice using XTTS v2 model
26
+ """
27
+ if not text or not text.strip():
28
+ return None, "❌ Please enter some text to convert!"
29
+
30
+ if not reference_audio:
31
+ return None, "❌ Please upload a reference audio file!"
32
+
33
+ if tts is None:
34
+ return None, "❌ TTS model not loaded properly!"
35
+
36
  try:
37
+ # Validate text length
38
+ if len(text) > 500:
39
+ return None, "❌ Text too long! Please keep it under 500 characters."
 
 
 
 
 
 
40
 
41
+ # Create temporary output file
42
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
43
+ output_path = tmp_file.name
 
 
44
 
45
+ # Generate cloned voice
46
+ print(f"🎀 Cloning voice for text: {text[:50]}...")
47
+ tts.tts_to_file(
48
+ text=text,
49
+ speaker_wav=reference_audio,
50
+ language="en",
51
+ file_path=output_path
52
+ )
53
 
54
+ # Verify output file exists and has content
55
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
56
+ return output_path, f"βœ… Voice cloning successful!\n🎡 Generated audio for: '{text[:100]}{'...' if len(text) > 100 else ''}'"
57
+ else:
58
+ return None, "❌ Failed to generate audio file!"
59
+
60
  except Exception as e:
61
+ error_msg = str(e)
62
+ print(f"❌ Voice cloning error: {error_msg}")
63
+
64
+ if "CUDA" in error_msg:
65
+ return None, "❌ GPU memory error! Try with shorter text or restart the space."
66
+ elif "audio" in error_msg.lower():
67
+ return None, "❌ Audio processing error! Please upload a clear WAV or MP3 file."
68
+ else:
69
+ return None, f"❌ Error: {error_msg}"
70
 
71
  # Create Gradio interface
72
+ def create_interface():
73
+ with gr.Blocks(
74
+ title="🎭 Voice Cloning Studio",
75
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
76
+ ) as demo:
77
+
78
+ # Header
79
+ gr.HTML("""
80
+ <div style="text-align: center; padding: 20px;">
81
+ <h1 style="color: #2E86AB; margin-bottom: 10px;">🎭 AI Voice Cloning Studio</h1>
82
+ <p style="color: #666; font-size: 18px;">Clone any voice with advanced AI technology</p>
83
+ </div>
84
+ """)
85
+
86
+ with gr.Row():
87
+ with gr.Column(scale=1):
88
+ # Input section
89
+ gr.HTML("<h3 style='color: #2E86AB;'>πŸ“€ Upload Reference Voice</h3>")
90
+ reference_audio = gr.Audio(
91
+ label="Reference Audio (10+ seconds recommended)",
92
+ type="filepath",
93
+ sources=["upload"]
94
+ )
95
+
96
+ gr.HTML("<h3 style='color: #2E86AB;'>πŸ“ Enter Text to Clone</h3>")
97
+ text_input = gr.Textbox(
98
+ label="Text to Convert",
99
+ placeholder="Enter the text you want to speak in the cloned voice...",
100
+ lines=4,
101
+ max_lines=6
102
+ )
103
+
104
+ clone_button = gr.Button(
105
+ "🎀 Clone Voice",
106
+ variant="primary",
107
+ size="lg"
108
+ )
109
+
110
+ with gr.Column(scale=1):
111
+ # Output section
112
+ gr.HTML("<h3 style='color: #2E86AB;'>🎡 Cloned Voice Output</h3>")
113
+ audio_output = gr.Audio(
114
+ label="Generated Audio",
115
+ type="filepath"
116
+ )
117
+
118
+ status_output = gr.Textbox(
119
+ label="Status",
120
+ lines=3,
121
+ interactive=False
122
+ )
123
+
124
+ # Examples section
125
+ gr.HTML("<h3 style='color: #2E86AB;'>πŸ’‘ Example Texts</h3>")
126
+ examples = [
127
+ "Hello, this is a demonstration of AI voice cloning technology.",
128
+ "Welcome to the future of artificial intelligence and speech synthesis.",
129
+ "This voice was generated using advanced machine learning models.",
130
+ "Experience the power of AI-driven voice generation with natural speech patterns."
131
+ ]
132
 
133
+ gr.Examples(
134
+ examples=examples,
135
+ inputs=text_input,
136
+ label="Click to try these examples:"
137
+ )
138
+
139
+ # How it works
140
+ with gr.Accordion("πŸ” How It Works", open=False):
141
+ gr.Markdown("""
142
+ ### The Technology
143
+ 1. **🎀 Voice Upload**: Upload 10+ seconds of clear speech
144
+ 2. **🧠 AI Analysis**: XTTS v2 model analyzes voice characteristics
145
+ 3. **πŸ“ Text Input**: Enter the text you want to convert
146
+ 4. **🎡 Voice Synthesis**: Generate speech that matches the uploaded voice
147
+
148
+ ### Tips for Best Results
149
+ - Use high-quality, clear audio recordings
150
+ - Ensure 10+ seconds of continuous speech
151
+ - Avoid background noise and music
152
+ - Single speaker only in reference audio
153
+
154
+ ### Supported Languages
155
+ - English (primary)
156
+ - Spanish, French, German, Italian, Portuguese
157
+ - Chinese, Japanese, Korean
158
+ """)
159
+
160
+ # Event handlers
161
+ clone_button.click(
162
+ fn=clone_voice,
163
+ inputs=[text_input, reference_audio],
164
+ outputs=[audio_output, status_output],
165
+ show_progress=True
166
+ )
167
+
168
+ # Auto-generate on Enter
169
+ text_input.submit(
170
+ fn=clone_voice,
171
+ inputs=[text_input, reference_audio],
172
+ outputs=[audio_output, status_output],
173
+ show_progress=True
174
+ )
175
 
176
+ return demo
 
 
 
 
 
177
 
178
  # Launch the app
179
  if __name__ == "__main__":
180
+ demo = create_interface()
181
+ demo.launch(
182
+ server_name="0.0.0.0",
183
+ server_port=7860,
184
+ share=False
185
+ )