Spaces:

sameerbanchhor
/

ChattisVani

Sleeping

App Files Files Community

sameerbanchhor commited on Jun 23, 2025

Commit

d2cdbdb

verified ·

1 Parent(s): ea0c6d8

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +12 -8
app.py +266 -0
requirements.txt +16 -0

README.md CHANGED Viewed

@@ -1,10 +1,14 @@
 ---
-license: mit
-title: ChattisVani
-sdk: docker
-emoji: 🏆
 colorFrom: red
-colorTo: gray
-pinned: true
-short_description: a cg app
----

 ---
+title: Chattisgarh Speech
+emoji: 🐢
 colorFrom: red
+colorTo: blue
+sdk: gradio
+sdk_version: 5.34.2
+app_file: app.py
+pinned: false
+license: apache-2.0
+short_description: a chattisgarh tts model demp
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import gradio as gr
+import torch
+import numpy as np
+import soundfile as sf
+import os
+import tempfile
+import logging
+from pathlib import Path
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global variable to store TTS model
+tts_model = None
+model_loaded = False
+def load_tts_model():
+    """Load the TTS model with multiple fallback methods"""
+    global tts_model, model_loaded
+    if model_loaded:
+        return True
+    try:
+        # Method 1: Try loading from Hugging Face Hub
+        try:
+            from TTS.api import TTS
+            from huggingface_hub import hf_hub_download
+            model_repo = "SYSPIN/vits_Chhattisgarhi_Female"
+            logger.info(f"Attempting to load model from {model_repo}...")
+            # Download model files from HF
+            model_path = hf_hub_download(
+                repo_id=model_repo,
+                filename="best_model.pth",
+                cache_dir="./model_cache"
+            )
+            config_path = hf_hub_download(
+                repo_id=model_repo,
+                filename="config.json",
+                cache_dir="./model_cache"
+            )
+            # Initialize TTS with downloaded files
+            tts_model = TTS(model_path=model_path, config_path=config_path)
+            model_loaded = True
+            logger.info("✅ Model loaded successfully from Hugging Face Hub!")
+            return True
+        except ImportError:
+            logger.warning("huggingface_hub not available, trying local files...")
+        except Exception as e:
+            logger.warning(f"Failed to load from HF Hub: {e}")
+        # Method 2: Try loading from local files (if uploaded to space or cloned)
+        local_paths = [
+            ("./best_model.pth", "./config.json"),  # Current directory
+            ("./model/best_model.pth", "./model/config.json"),  # Model subdirectory
+            ("../best_model.pth", "../config.json"),  # Parent directory
+        ]
+        for model_path, config_path in local_paths:
+            if os.path.exists(model_path) and os.path.exists(config_path):
+                logger.info(f"Found local model files at {model_path}")
+                from TTS.api import TTS
+                tts_model = TTS(model_path=model_path, config_path=config_path)
+                model_loaded = True
+                logger.info("✅ Model loaded successfully from local files!")
+                return True
+        # Method 3: Try to use a generic VITS model as fallback
+        logger.warning("Custom model not found, trying generic VITS model...")
+        try:
+            from TTS.api import TTS
+            # Use a generic multilingual model as fallback
+            tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
+            model_loaded = True
+            logger.info("✅ Loaded fallback multilingual model")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to load fallback model: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"Critical error loading model: {str(e)}")
+        return False
+def generate_speech(text, speed=1.0):
+    """Generate speech from text"""
+    global tts_model, model_loaded
+    if not text.strip():
+        return None, "⚠️ Please enter some text to synthesize."
+    # Try to load model if not already loaded
+    if not model_loaded:
+        success = load_tts_model()
+        if not success:
+            return None, "❌ Error: Could not load any TTS model. Please check the setup."
+    try:
+        logger.info(f"Synthesizing: {text[:50]}...")
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            output_path = tmp_file.name
+        # Generate speech - handle different TTS API versions
+        try:
+            # Method for custom models
+            tts_model.tts_to_file(
+                text=text,
+                file_path=output_path,
+                speed=speed
+            )
+        except TypeError:
+            # Fallback for models that don't support speed parameter
+            try:
+                tts_model.tts_to_file(text=text, file_path=output_path)
+            except Exception:
+                # For XTTS and other models that need different parameters
+                tts_model.tts_to_file(
+                    text=text,
+                    file_path=output_path,
+                    speaker_wav=None,  # Use default speaker
+                    language="hi"  # Hindi as closest language
+                )
+        # Check if file was created and has content
+        if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
+            return None, "❌ Error: Audio file was not generated properly."
+        # Read audio data
+        audio_data, sample_rate = sf.read(output_path)
+        # Clean up
+        os.unlink(output_path)
+        if len(audio_data) == 0:
+            return None, "❌ Error: Generated audio is empty."
+        logger.info("✅ Speech generated successfully!")
+        return (sample_rate, audio_data), "✅ Speech generated successfully!"
+    except Exception as e:
+        error_msg = f"❌ Error during synthesis: {str(e)}"
+        logger.error(error_msg)
+        return None, error_msg
+# Sample texts
+examples = [
+    ["नमस्कार, का हाल बा?", 1.0],
+    ["आज मोसम बहुत बढ़िया हे।", 1.0],
+    ["तुमन कइसे हव?", 0.9],
+    ["धन्यवाद।", 1.1],
+    ["Hello, how are you?", 1.0]  # English fallback for testing
+]
+# Create Gradio interface
+with gr.Blocks(
+    title="Chhattisgarhi TTS",
+    theme=gr.themes.Default(primary_hue="blue")
+) as demo:
+    gr.HTML("""
+    <div style="text-align: center; margin: 20px 0;">
+        <h1>🗣️ Chhattisgarhi Text-to-Speech</h1>
+        <p style="color: #666;">Generate natural Chhattisgarhi speech with AI</p>
+        <p style="color: #888; font-size: 0.9em;">Powered by SySpin & Coqui TTS</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_input = gr.Textbox(
+                label="📝 Enter Text",
+                placeholder="छत्तीसगढ़ी में अपना टेक्स्ट लिखें... (Enter Chhattisgarhi text here)",
+                lines=3,
+                max_lines=6
+            )
+            speed_slider = gr.Slider(
+                minimum=0.5,
+                maximum=2.0,
+                value=1.0,
+                step=0.1,
+                label="🎚️ Speech Speed",
+                info="Adjust speaking rate (may not work with all models)"
+            )
+            generate_btn = gr.Button(
+                "🎵 Generate Speech",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### Quick Examples")
+            for text, _ in examples:
+                btn = gr.Button(text, size="sm")
+                btn.click(lambda x=text: x, outputs=text_input)
+    with gr.Row():
+        audio_output = gr.Audio(
+            label="🔊 Generated Speech",
+            type="numpy"
+        )
+        status_output = gr.Textbox(
+            label="📊 Status",
+            interactive=False,
+            max_lines=3
+        )
+    gr.Examples(
+        examples=examples,
+        inputs=[text_input, speed_slider],
+        outputs=[audio_output, status_output],
+        fn=generate_speech,
+        cache_examples=False
+    )
+    with gr.Accordion("ℹ️ Model Information", open=False):
+        gr.Markdown("""
+        ### About This Model
+        - **Language**: Chhattisgarhi (छत्तीसगढ़ी)
+        - **Voice Type**: Female
+        - **Training**: SySpin dataset
+        - **Engine**: Coqui TTS
+        ### Model Loading Strategy
+        1. First tries to load the custom Chhattisgarhi model from Hugging Face
+        2. Falls back to local files if available
+        3. Uses a multilingual model as last resort
+        ### How to Use
+        1. Enter your text in Chhattisgarhi (Devanagari script preferred)
+        2. Adjust speech speed if needed (may not work with all models)
+        3. Click "Generate Speech"
+        4. Listen to the generated audio
+        ### Tips
+        - Use proper punctuation for natural pauses
+        - Shorter sentences often work better
+        - If the custom model fails, a fallback model will be used
+        """)
+    # Event binding
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, speed_slider],
+        outputs=[audio_output, status_output]
+    )
+    # Load model on startup
+    demo.load(
+        fn=lambda: "🔄 Loading TTS model..." if not load_tts_model() else "✅ Model ready!",
+        outputs=status_output
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+TTS==0.22.0
+torch>=1.9.0
+torchaudio>=0.9.0
+numpy>=1.21.0
+soundfile>=0.12.1
+librosa>=0.9.2
+scipy>=1.7.0
+pyyaml>=6.0
+matplotlib>=3.5.0
+Pillow>=8.3.0
+numba>=0.56.0
+inflect>=5.6.0
+psutil>=5.8.0
+pynndescent>=0.5.7
+umap-learn>=0.5.3
+huggingface_hub>=0.16.0