ashishkblink commited on
Commit
f150896
·
0 Parent(s):

Fix Gradio compatibility: Remove unsupported 'info' parameter from Audio and Dropdown components

Browse files
Files changed (3) hide show
  1. README.md +62 -0
  2. app.py +248 -0
  3. requirements.txt +9 -0
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Vakya TTS Playground
3
+ emoji: 🎤
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # 🎤 Vakya TTS Playground
14
+
15
+ **India's No. 1 TTS Model for Hindi and Other Indian Languages**
16
+
17
+ Interactive playground to test and experience the power of Vakya TTS - a state-of-the-art Text-to-Speech model fine-tuned from XTTS-v2, specifically optimized for Hindi and other Indian languages.
18
+
19
+ ## 🎯 Features
20
+
21
+ - **High-quality Hindi TTS** - Optimized specifically for Hindi pronunciation and intonation
22
+ - **Multi-Indian Language Support** - Supports 10+ Indian languages
23
+ - **Voice Cloning** - Clone voices from just 6 seconds of audio
24
+ - **Real-time Synthesis** - Fast and efficient speech generation
25
+ - **Natural Sounding** - Human-like voice quality
26
+
27
+ ## 🚀 How to Use
28
+
29
+ 1. **Enter Text**: Type or paste your text in the text box
30
+ 2. **Select Language**: Choose from Hindi, English, Marathi, Telugu, Tamil, Kannada, Gujarati, Punjabi, Bengali, or Urdu
31
+ 3. **Upload Speaker Audio (Optional)**: Upload a 6+ second audio file to clone the voice
32
+ 4. **Generate**: Click "Generate Speech" and enjoy the output!
33
+
34
+ ## 📊 Supported Languages
35
+
36
+ - Hindi (hi) - Primary focus
37
+ - English (en)
38
+ - Marathi (mr)
39
+ - Telugu (te)
40
+ - Tamil (ta)
41
+ - Kannada (kn)
42
+ - Gujarati (gu)
43
+ - Punjabi (pa)
44
+ - Bengali (bn)
45
+ - Urdu (ur)
46
+
47
+ ## 🔗 Model Repository
48
+
49
+ The model is available at: [ashishkblink/vakya](https://huggingface.co/ashishkblink/vakya)
50
+
51
+ ## 📄 License
52
+
53
+ Apache 2.0
54
+
55
+ ## 👤 Author
56
+
57
+ ashishkblink
58
+
59
+ ---
60
+
61
+ *Built with ❤️ for the Indian language community*
62
+
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Vakya TTS - Hugging Face Space Playground
3
+ India's No. 1 TTS Model for Hindi and Other Indian Languages
4
+ """
5
+
6
+ import gradio as gr
7
+ from TTS.api import TTS
8
+ import os
9
+ import tempfile
10
+ from pathlib import Path
11
+
12
+ # Initialize the TTS model
13
+ MODEL_NAME = "ashishkblink/vakya"
14
+
15
+ print("🚀 Loading Vakya TTS model...")
16
+ try:
17
+ tts = TTS(model_name=MODEL_NAME)
18
+ print("✅ Model loaded successfully!")
19
+ except Exception as e:
20
+ print(f"❌ Error loading model: {e}")
21
+ tts = None
22
+
23
+ # Supported languages for Indian languages
24
+ INDIAN_LANGUAGES = {
25
+ "Hindi": "hi",
26
+ "English": "en",
27
+ "Marathi": "mr",
28
+ "Telugu": "te",
29
+ "Tamil": "ta",
30
+ "Kannada": "kn",
31
+ "Gujarati": "gu",
32
+ "Punjabi": "pa",
33
+ "Bengali": "bn",
34
+ "Urdu": "ur",
35
+ }
36
+
37
+ # Example texts for each language
38
+ EXAMPLE_TEXTS = {
39
+ "hi": "नमस्ते, यह वाक्य TTS मॉडल है। यह भारत का नंबर एक टेक्स्ट-टू-स्पीच मॉडल है।",
40
+ "en": "Hello, this is the Vakya TTS model. It is India's number one text-to-speech model.",
41
+ "mr": "नमस्कार, हे वाक्य TTS मॉडेल आहे. हे भारतातील नंबर वन टेक्स्ट-टू-स्पीच मॉडेल आहे.",
42
+ "te": "నమస్కారం, ఇది వాక్య TTS మోడల్. ఇది భారతదేశంలోని నంబర్ వన్ టెక్స్ట్-టు-స్పీచ్ మోడల్.",
43
+ "ta": "வணக்கம், இது வாக்கிய TTS மாதிரி. இது இந்தியாவின் நம்பர் ஒன் டெக்ஸ்ட்-டு-ஸ்பீச் மாதிரி.",
44
+ "kn": "ನಮಸ್ಕಾರ, ಇದು ವಾಕ್ಯ TTS ಮಾದರಿ. ಇದು ಭಾರತದ ನಂಬರ್ ವನ್ ಟೆಕ್ಸ್ಟ್-ಟು-ಸ್ಪೀಚ್ ಮಾದರಿ.",
45
+ "gu": "નમસ્તે, આ વાક્ય TTS મોડલ છે. આ ભારતનું નંબર વન ટેક્સ્ટ-ટુ-સ્પીચ મોડલ છે.",
46
+ "pa": "ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਇਹ ਵਾਕ TTS ਮਾਡਲ ਹੈ। ਇਹ ਭਾਰਤ ਦਾ ਨੰਬਰ ਵਨ ਟੈਕਸਟ-ਟੂ-ਸਪੀਚ ਮਾਡਲ ਹੈ।",
47
+ "bn": "নমস্কার, এটি বাক্য TTS মডেল। এটি ভারতের নম্বর ওয়ান টেক্সট-টু-স্পিচ মডেল।",
48
+ "ur": "السلام علیکم، یہ واکیہ TTS ماڈل ہے۔ یہ بھارت کا نمبر ایک ٹیکسٹ-ٹو-اسپیچ ماڈل ہے۔",
49
+ }
50
+
51
+ def synthesize_speech(text, language, speaker_audio):
52
+ """
53
+ Synthesize speech from text using Vakya TTS model
54
+ """
55
+ if tts is None:
56
+ return None, "❌ Model not loaded. Please check the logs."
57
+
58
+ if not text or not text.strip():
59
+ return None, "⚠️ Please enter some text to synthesize."
60
+
61
+ # Get language code
62
+ lang_code = INDIAN_LANGUAGES.get(language, "hi")
63
+
64
+ # Create temporary file for output
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
66
+ output_path = tmp_file.name
67
+
68
+ try:
69
+ # XTTS requires a speaker_wav for voice cloning
70
+ # If speaker audio is provided, use it
71
+ if speaker_audio is not None:
72
+ speaker_wav = speaker_audio
73
+ else:
74
+ # Try to use a default sample from the model
75
+ # XTTS can work without explicit speaker_wav if using TTS.api
76
+ # Let's use a simple approach - try with a minimal default
77
+ speaker_wav = None
78
+
79
+ # Synthesize speech using TTS API
80
+ # The TTS.api handles the speaker_wav internally if not provided
81
+ tts.tts_to_file(
82
+ text=text,
83
+ speaker_wav=speaker_wav if speaker_wav else None,
84
+ language=lang_code,
85
+ file_path=output_path
86
+ )
87
+
88
+ return output_path, "✅ Speech generated successfully! 🎉"
89
+
90
+ except Exception as e:
91
+ error_msg = f"❌ Error generating speech: {str(e)}"
92
+ print(error_msg)
93
+ import traceback
94
+ traceback.print_exc()
95
+ return None, error_msg
96
+
97
+ # Custom CSS for better styling
98
+ css = """
99
+ .gradio-container {
100
+ font-family: 'Inter', sans-serif;
101
+ }
102
+ .header {
103
+ text-align: center;
104
+ padding: 20px;
105
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
106
+ color: white;
107
+ border-radius: 10px;
108
+ margin-bottom: 20px;
109
+ }
110
+ .header h1 {
111
+ margin: 0;
112
+ font-size: 2.5em;
113
+ }
114
+ .header p {
115
+ margin: 10px 0 0 0;
116
+ font-size: 1.2em;
117
+ opacity: 0.9;
118
+ }
119
+ """
120
+
121
+ # Create Gradio interface
122
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
123
+ gr.HTML("""
124
+ <div class="header">
125
+ <h1>🎤 Vakya TTS</h1>
126
+ <p>India's No. 1 TTS Model for Hindi and Other Indian Languages</p>
127
+ </div>
128
+ """)
129
+
130
+ gr.Markdown("""
131
+ ### Welcome to Vakya TTS Playground! 🚀
132
+
133
+ **Test the power of India's premier Text-to-Speech model:**
134
+
135
+ - 🎯 **High-quality Hindi TTS** - Optimized for Hindi pronunciation
136
+ - 🌍 **Multi-Indian Language Support** - Supports 10+ Indian languages
137
+ - 🎭 **Voice Cloning** - Clone voices from just 6 seconds of audio
138
+ - ⚡ **Real-time Synthesis** - Fast and efficient speech generation
139
+
140
+ **How to use:**
141
+ 1. Enter your text in the text box
142
+ 2. Select the language (Hindi, English, Marathi, Telugu, Tamil, etc.)
143
+ 3. (Optional) Upload a speaker reference audio file for voice cloning
144
+ 4. Click "Generate Speech" and enjoy! 🎉
145
+ """)
146
+
147
+ with gr.Row():
148
+ with gr.Column(scale=1):
149
+ text_input = gr.Textbox(
150
+ label="📝 Enter Text",
151
+ placeholder="Type your text here... (e.g., नमस्ते, यह वाक्य TTS मॉडल है)",
152
+ lines=5,
153
+ value=EXAMPLE_TEXTS["hi"]
154
+ )
155
+
156
+ language_dropdown = gr.Dropdown(
157
+ label="🌍 Select Language",
158
+ choices=list(INDIAN_LANGUAGES.keys()),
159
+ value="Hindi"
160
+ )
161
+
162
+ speaker_audio = gr.Audio(
163
+ label="🎤 Speaker Reference Audio (Optional)",
164
+ type="filepath"
165
+ )
166
+ gr.Markdown("*Upload a 6+ second audio file to clone the voice. Leave empty for default voice.*")
167
+
168
+ generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
169
+
170
+ status_text = gr.Textbox(
171
+ label="Status",
172
+ interactive=False,
173
+ value="Ready to generate speech!"
174
+ )
175
+
176
+ with gr.Column(scale=1):
177
+ output_audio = gr.Audio(
178
+ label="🔊 Generated Speech",
179
+ type="filepath"
180
+ )
181
+
182
+ gr.Markdown("""
183
+ ### 💡 Tips:
184
+ - For best results in Hindi, use Devanagari script (नमस्ते)
185
+ - Speaker audio should be clear and at least 6 seconds long
186
+ - You can download the generated audio by clicking the download button
187
+ """)
188
+
189
+ # Examples section
190
+ gr.Markdown("### 📚 Example Texts (Click to use)")
191
+
192
+ with gr.Row():
193
+ for lang_name, lang_code in list(INDIAN_LANGUAGES.items())[:5]:
194
+ example_text = EXAMPLE_TEXTS.get(lang_code, "")
195
+ gr.Button(
196
+ f"{lang_name} Example",
197
+ size="sm"
198
+ ).click(
199
+ fn=lambda txt=example_text, lang=lang_name: (txt, lang),
200
+ outputs=[text_input, language_dropdown]
201
+ )
202
+
203
+ with gr.Row():
204
+ for lang_name, lang_code in list(INDIAN_LANGUAGES.items())[5:]:
205
+ example_text = EXAMPLE_TEXTS.get(lang_code, "")
206
+ gr.Button(
207
+ f"{lang_name} Example",
208
+ size="sm"
209
+ ).click(
210
+ fn=lambda txt=example_text, lang=lang_name: (txt, lang),
211
+ outputs=[text_input, language_dropdown]
212
+ )
213
+
214
+ # Footer
215
+ gr.Markdown("""
216
+ ---
217
+ ### 🔗 Links
218
+ - **Model Repository**: [ashishkblink/vakya](https://huggingface.co/ashishkblink/vakya)
219
+ - **Built with**: [Coqui TTS](https://github.com/coqui-ai/TTS)
220
+
221
+ ### 📄 License
222
+ Apache 2.0
223
+
224
+ *Built with ❤️ for the Indian language community*
225
+ """)
226
+
227
+ # Connect the generate button
228
+ generate_btn.click(
229
+ fn=synthesize_speech,
230
+ inputs=[text_input, language_dropdown, speaker_audio],
231
+ outputs=[output_audio, status_text]
232
+ )
233
+
234
+ # Auto-load example when language changes
235
+ language_dropdown.change(
236
+ fn=lambda lang: EXAMPLE_TEXTS.get(INDIAN_LANGUAGES.get(lang, "hi"), ""),
237
+ inputs=[language_dropdown],
238
+ outputs=[text_input]
239
+ )
240
+
241
+ # Launch the app
242
+ if __name__ == "__main__":
243
+ demo.launch(
244
+ server_name="0.0.0.0",
245
+ server_port=7860,
246
+ share=False
247
+ )
248
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ TTS>=0.22.0
2
+ gradio>=4.0.0
3
+ torch>=2.0.0
4
+ torchaudio>=2.0.0
5
+ numpy>=1.21.0
6
+ scipy>=1.7.0
7
+ librosa>=0.9.0
8
+ soundfile>=0.10.0
9
+