indicF5

Sleeping

App Files Files Community

ashishkblink commited on Jan 4

Commit

f150896

0 Parent(s):

Fix Gradio compatibility: Remove unsupported 'info' parameter from Audio and Dropdown components

Browse files

Files changed (3) hide show

README.md +62 -0
app.py +248 -0
requirements.txt +9 -0

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+title: Vakya TTS Playground
+emoji: 🎤
+colorFrom: purple
+colorTo: pink
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+# 🎤 Vakya TTS Playground
+**India's No. 1 TTS Model for Hindi and Other Indian Languages**
+Interactive playground to test and experience the power of Vakya TTS - a state-of-the-art Text-to-Speech model fine-tuned from XTTS-v2, specifically optimized for Hindi and other Indian languages.
+## 🎯 Features
+- **High-quality Hindi TTS** - Optimized specifically for Hindi pronunciation and intonation
+- **Multi-Indian Language Support** - Supports 10+ Indian languages
+- **Voice Cloning** - Clone voices from just 6 seconds of audio
+- **Real-time Synthesis** - Fast and efficient speech generation
+- **Natural Sounding** - Human-like voice quality
+## 🚀 How to Use
+1. **Enter Text**: Type or paste your text in the text box
+2. **Select Language**: Choose from Hindi, English, Marathi, Telugu, Tamil, Kannada, Gujarati, Punjabi, Bengali, or Urdu
+3. **Upload Speaker Audio (Optional)**: Upload a 6+ second audio file to clone the voice
+4. **Generate**: Click "Generate Speech" and enjoy the output!
+## 📊 Supported Languages
+- Hindi (hi) - Primary focus
+- English (en)
+- Marathi (mr)
+- Telugu (te)
+- Tamil (ta)
+- Kannada (kn)
+- Gujarati (gu)
+- Punjabi (pa)
+- Bengali (bn)
+- Urdu (ur)
+## 🔗 Model Repository
+The model is available at: [ashishkblink/vakya](https://huggingface.co/ashishkblink/vakya)
+## 📄 License
+Apache 2.0
+## 👤 Author
+ashishkblink
+---
+*Built with ❤️ for the Indian language community*

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+"""
+Vakya TTS - Hugging Face Space Playground
+India's No. 1 TTS Model for Hindi and Other Indian Languages
+"""
+import gradio as gr
+from TTS.api import TTS
+import os
+import tempfile
+from pathlib import Path
+# Initialize the TTS model
+MODEL_NAME = "ashishkblink/vakya"
+print("🚀 Loading Vakya TTS model...")
+try:
+    tts = TTS(model_name=MODEL_NAME)
+    print("✅ Model loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    tts = None
+# Supported languages for Indian languages
+INDIAN_LANGUAGES = {
+    "Hindi": "hi",
+    "English": "en",
+    "Marathi": "mr",
+    "Telugu": "te",
+    "Tamil": "ta",
+    "Kannada": "kn",
+    "Gujarati": "gu",
+    "Punjabi": "pa",
+    "Bengali": "bn",
+    "Urdu": "ur",
+}
+# Example texts for each language
+EXAMPLE_TEXTS = {
+    "hi": "नमस्ते, यह वाक्य TTS मॉडल है। यह भारत का नंबर एक टेक्स्ट-टू-स्पीच मॉडल है।",
+    "en": "Hello, this is the Vakya TTS model. It is India's number one text-to-speech model.",
+    "mr": "नमस्कार, हे वाक्य TTS मॉडेल आहे. हे भारतातील नंबर वन टेक्स्ट-टू-स्पीच मॉडेल आहे.",
+    "te": "నమస్కారం, ఇది వాక్య TTS మోడల్. ఇది భారతదేశంలోని నంబర్ వన్ టెక్స్ట్-టు-స్పీచ్ మోడల్.",
+    "ta": "வணக்கம், இது வாக்கிய TTS மாதிரி. இது இந்தியாவின் நம்பர் ஒன் டெக்ஸ்ட்-டு-ஸ்பீச் மாதிரி.",
+    "kn": "ನಮಸ್ಕಾರ, ಇದು ವಾಕ್ಯ TTS ಮಾದರಿ. ಇದು ಭಾರತದ ನಂಬರ್ ವನ್ ಟೆಕ್ಸ್ಟ್-ಟು-ಸ್ಪೀಚ್ ಮಾದರಿ.",
+    "gu": "નમસ્તે, આ વાક્ય TTS મોડલ છે. આ ભારતનું નંબર વન ટેક્સ્ટ-ટુ-સ્પીચ મોડલ છે.",
+    "pa": "ਸਤ ਸ੍ਰੀ ਅਕਾਲ, ਇਹ ਵਾਕ TTS ਮਾਡਲ ਹੈ। ਇਹ ਭਾਰਤ ਦਾ ਨੰਬਰ ਵਨ ਟੈਕਸਟ-ਟੂ-ਸਪੀਚ ਮਾਡਲ ਹੈ।",
+    "bn": "নমস্কার, এটি বাক্য TTS মডেল। এটি ভারতের নম্বর ওয়ান টেক্সট-টু-স্পিচ মডেল।",
+    "ur": "السلام علیکم، یہ واکیہ TTS ماڈل ہے۔ یہ بھارت کا نمبر ایک ٹیکسٹ-ٹو-اسپیچ ماڈل ہے۔",
+}
+def synthesize_speech(text, language, speaker_audio):
+    """
+    Synthesize speech from text using Vakya TTS model
+    """
+    if tts is None:
+        return None, "❌ Model not loaded. Please check the logs."
+    if not text or not text.strip():
+        return None, "⚠️ Please enter some text to synthesize."
+    # Get language code
+    lang_code = INDIAN_LANGUAGES.get(language, "hi")
+    # Create temporary file for output
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        output_path = tmp_file.name
+    try:
+        # XTTS requires a speaker_wav for voice cloning
+        # If speaker audio is provided, use it
+        if speaker_audio is not None:
+            speaker_wav = speaker_audio
+        else:
+            # Try to use a default sample from the model
+            # XTTS can work without explicit speaker_wav if using TTS.api
+            # Let's use a simple approach - try with a minimal default
+            speaker_wav = None
+        # Synthesize speech using TTS API
+        # The TTS.api handles the speaker_wav internally if not provided
+        tts.tts_to_file(
+            text=text,
+            speaker_wav=speaker_wav if speaker_wav else None,
+            language=lang_code,
+            file_path=output_path
+        )
+        return output_path, "✅ Speech generated successfully! 🎉"
+    except Exception as e:
+        error_msg = f"❌ Error generating speech: {str(e)}"
+        print(error_msg)
+        import traceback
+        traceback.print_exc()
+        return None, error_msg
+# Custom CSS for better styling
+css = """
+.gradio-container {
+    font-family: 'Inter', sans-serif;
+}
+.header {
+    text-align: center;
+    padding: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+.header h1 {
+    margin: 0;
+    font-size: 2.5em;
+}
+.header p {
+    margin: 10px 0 0 0;
+    font-size: 1.2em;
+    opacity: 0.9;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+    gr.HTML("""
+    <div class="header">
+        <h1>🎤 Vakya TTS</h1>
+        <p>India's No. 1 TTS Model for Hindi and Other Indian Languages</p>
+    </div>
+    """)
+    gr.Markdown("""
+    ### Welcome to Vakya TTS Playground! 🚀
+    **Test the power of India's premier Text-to-Speech model:**
+    - 🎯 **High-quality Hindi TTS** - Optimized for Hindi pronunciation
+    - 🌍 **Multi-Indian Language Support** - Supports 10+ Indian languages
+    - 🎭 **Voice Cloning** - Clone voices from just 6 seconds of audio
+    - ⚡ **Real-time Synthesis** - Fast and efficient speech generation
+    **How to use:**
+    1. Enter your text in the text box
+    2. Select the language (Hindi, English, Marathi, Telugu, Tamil, etc.)
+    3. (Optional) Upload a speaker reference audio file for voice cloning
+    4. Click "Generate Speech" and enjoy! 🎉
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            text_input = gr.Textbox(
+                label="📝 Enter Text",
+                placeholder="Type your text here... (e.g., नमस्ते, यह वाक्य TTS मॉडल है)",
+                lines=5,
+                value=EXAMPLE_TEXTS["hi"]
+            )
+            language_dropdown = gr.Dropdown(
+                label="🌍 Select Language",
+                choices=list(INDIAN_LANGUAGES.keys()),
+                value="Hindi"
+            )
+            speaker_audio = gr.Audio(
+                label="🎤 Speaker Reference Audio (Optional)",
+                type="filepath"
+            )
+            gr.Markdown("*Upload a 6+ second audio file to clone the voice. Leave empty for default voice.*")
+            generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
+            status_text = gr.Textbox(
+                label="Status",
+                interactive=False,
+                value="Ready to generate speech!"
+            )
+        with gr.Column(scale=1):
+            output_audio = gr.Audio(
+                label="🔊 Generated Speech",
+                type="filepath"
+            )
+            gr.Markdown("""
+            ### 💡 Tips:
+            - For best results in Hindi, use Devanagari script (नमस्ते)
+            - Speaker audio should be clear and at least 6 seconds long
+            - You can download the generated audio by clicking the download button
+            """)
+    # Examples section
+    gr.Markdown("### 📚 Example Texts (Click to use)")
+    with gr.Row():
+        for lang_name, lang_code in list(INDIAN_LANGUAGES.items())[:5]:
+            example_text = EXAMPLE_TEXTS.get(lang_code, "")
+            gr.Button(
+                f"{lang_name} Example",
+                size="sm"
+            ).click(
+                fn=lambda txt=example_text, lang=lang_name: (txt, lang),
+                outputs=[text_input, language_dropdown]
+            )
+    with gr.Row():
+        for lang_name, lang_code in list(INDIAN_LANGUAGES.items())[5:]:
+            example_text = EXAMPLE_TEXTS.get(lang_code, "")
+            gr.Button(
+                f"{lang_name} Example",
+                size="sm"
+            ).click(
+                fn=lambda txt=example_text, lang=lang_name: (txt, lang),
+                outputs=[text_input, language_dropdown]
+            )
+    # Footer
+    gr.Markdown("""
+    ---
+    ### 🔗 Links
+    - **Model Repository**: [ashishkblink/vakya](https://huggingface.co/ashishkblink/vakya)
+    - **Built with**: [Coqui TTS](https://github.com/coqui-ai/TTS)
+    ### 📄 License
+    Apache 2.0
+    *Built with ❤️ for the Indian language community*
+    """)
+    # Connect the generate button
+    generate_btn.click(
+        fn=synthesize_speech,
+        inputs=[text_input, language_dropdown, speaker_audio],
+        outputs=[output_audio, status_text]
+    )
+    # Auto-load example when language changes
+    language_dropdown.change(
+        fn=lambda lang: EXAMPLE_TEXTS.get(INDIAN_LANGUAGES.get(lang, "hi"), ""),
+        inputs=[language_dropdown],
+        outputs=[text_input]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+TTS>=0.22.0
+gradio>=4.0.0
+torch>=2.0.0
+torchaudio>=2.0.0
+numpy>=1.21.0
+scipy>=1.7.0
+librosa>=0.9.0
+soundfile>=0.10.0