import gradio as gr import torch import numpy as np import soundfile as sf import os import tempfile import logging from pathlib import Path # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Global variable to store TTS model tts_model = None model_loaded = False def load_tts_model(): """Load the TTS model with multiple fallback methods""" global tts_model, model_loaded if model_loaded: return True try: # Method 1: Try loading from Hugging Face Hub try: from TTS.api import TTS from huggingface_hub import hf_hub_download model_repo = "SYSPIN/vits_Chhattisgarhi_Female" logger.info(f"Attempting to load model from {model_repo}...") # Download model files from HF model_path = hf_hub_download( repo_id=model_repo, filename="best_model.pth", cache_dir="./model_cache" ) config_path = hf_hub_download( repo_id=model_repo, filename="config.json", cache_dir="./model_cache" ) # Initialize TTS with downloaded files tts_model = TTS(model_path=model_path, config_path=config_path) model_loaded = True logger.info("✅ Model loaded successfully from Hugging Face Hub!") return True except ImportError: logger.warning("huggingface_hub not available, trying local files...") except Exception as e: logger.warning(f"Failed to load from HF Hub: {e}") # Method 2: Try loading from local files (if uploaded to space or cloned) local_paths = [ ("./best_model.pth", "./config.json"), # Current directory ("./model/best_model.pth", "./model/config.json"), # Model subdirectory ("../best_model.pth", "../config.json"), # Parent directory ] for model_path, config_path in local_paths: if os.path.exists(model_path) and os.path.exists(config_path): logger.info(f"Found local model files at {model_path}") from TTS.api import TTS tts_model = TTS(model_path=model_path, config_path=config_path) model_loaded = True logger.info("✅ Model loaded successfully from local files!") return True # Method 3: Try to use a generic VITS model as fallback logger.warning("Custom model not found, trying generic VITS model...") try: from TTS.api import TTS # Use a generic multilingual model as fallback tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2") model_loaded = True logger.info("✅ Loaded fallback multilingual model") return True except Exception as e: logger.error(f"Failed to load fallback model: {e}") return False except Exception as e: logger.error(f"Critical error loading model: {str(e)}") return False def generate_speech(text, speed=1.0): """Generate speech from text""" global tts_model, model_loaded if not text.strip(): return None, "⚠️ Please enter some text to synthesize." # Try to load model if not already loaded if not model_loaded: success = load_tts_model() if not success: return None, "❌ Error: Could not load any TTS model. Please check the setup." try: logger.info(f"Synthesizing: {text[:50]}...") # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: output_path = tmp_file.name # Generate speech - handle different TTS API versions try: # Method for custom models tts_model.tts_to_file( text=text, file_path=output_path, speed=speed ) except TypeError: # Fallback for models that don't support speed parameter try: tts_model.tts_to_file(text=text, file_path=output_path) except Exception: # For XTTS and other models that need different parameters tts_model.tts_to_file( text=text, file_path=output_path, speaker_wav=None, # Use default speaker language="hi" # Hindi as closest language ) # Check if file was created and has content if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: return None, "❌ Error: Audio file was not generated properly." # Read audio data audio_data, sample_rate = sf.read(output_path) # Clean up os.unlink(output_path) if len(audio_data) == 0: return None, "❌ Error: Generated audio is empty." logger.info("✅ Speech generated successfully!") return (sample_rate, audio_data), "✅ Speech generated successfully!" except Exception as e: error_msg = f"❌ Error during synthesis: {str(e)}" logger.error(error_msg) return None, error_msg # Sample texts examples = [ ["नमस्कार, का हाल बा?", 1.0], ["आज मोसम बहुत बढ़िया हे।", 1.0], ["तुमन कइसे हव?", 0.9], ["धन्यवाद।", 1.1], ["Hello, how are you?", 1.0] # English fallback for testing ] # Create Gradio interface with gr.Blocks( title="Chhattisgarhi TTS", theme=gr.themes.Default(primary_hue="blue") ) as demo: gr.HTML("""
Generate natural Chhattisgarhi speech with AI
Powered by SySpin & Coqui TTS