Spaces:

JustNikunj
/

Sentimental_Analysis

Sleeping

App Files Files Community

JustNikunj commited on Oct 7, 2025

Commit

c4e8a9d

verified ·

1 Parent(s): cdf105f

Update app.py

Browse files

Files changed (1) hide show

app.py +333 -226

app.py CHANGED Viewed

@@ -7,30 +7,26 @@ import numpy as np
 import re
 import warnings
 import os
 warnings.filterwarnings('ignore')
-print("🚀 Starting Enhanced Hindi Speech Sentiment Analysis App...")
 # ============================================
 # 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
 # ============================================
-# Global variables to store loaded models
 SENTIMENT_PIPELINE = None
 ASR_MODEL = None
 def load_models():
-    """
-    Load all models once at startup and cache them globally
-    """
     global SENTIMENT_PIPELINE, ASR_MODEL
-    # Check if already loaded
     if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None:
         print("✅ Models already loaded, skipping...")
         return
-    # Load Hindi Sentiment Model
     print("📚 Loading Hindi sentiment analysis model...")
     try:
         sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
@@ -44,7 +40,6 @@ def load_models():
         print(f"❌ Error loading sentiment model: {e}")
         raise
-    # Load Indic Conformer for Hindi ASR
     print("🎤 Loading Indic Conformer 600M ASR model...")
     try:
         ASR_MODEL = AutoModel.from_pretrained(
@@ -58,67 +53,152 @@ def load_models():
     print("✅ All models loaded and cached in memory")
-# Load models at startup
 load_models()
 # ============================================
-# 2. ENHANCED AUDIO PREPROCESSING FUNCTIONS
 # ============================================
-def advanced_preprocess_audio(audio_path, target_sr=16000):
     """
-    Advanced audio preprocessing pipeline for optimal ASR performance
     """
     try:
-        # Load audio with torchaudio for better compatibility
         wav, sr = torchaudio.load(audio_path)
-        # Convert stereo to mono by averaging channels
         if wav.shape[0] > 1:
             wav = torch.mean(wav, dim=0, keepdim=True)
             print(f"📊 Converted stereo to mono")
-        # Resample if needed
         if sr != target_sr:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
             wav = resampler(wav)
             print(f"🔄 Resampled from {sr}Hz to {target_sr}Hz")
-        # Convert to numpy for processing
         audio_np = wav.squeeze().numpy()
-        # 1. Remove DC offset (center around zero)
         audio_np = audio_np - np.mean(audio_np)
-        # 2. Trim silence from beginning and end (aggressive trimming)
-        audio_trimmed, trim_indices = librosa.effects.trim(
             audio_np,
-            top_db=25,  # More aggressive silence removal
             frame_length=2048,
             hop_length=512
         )
         print(f"✂️ Trimmed {len(audio_np) - len(audio_trimmed)} silent samples")
-        # 3. Normalize audio amplitude to [-1, 1]
         audio_normalized = librosa.util.normalize(audio_trimmed)
-        # 4. Apply pre-emphasis filter (boost high frequencies)
         pre_emphasis = 0.97
         audio_emphasized = np.append(
             audio_normalized[0],
             audio_normalized[1:] - pre_emphasis * audio_normalized[:-1]
         )
-        # 5. Advanced noise reduction
         audio_denoised = spectral_noise_gate(audio_emphasized, target_sr)
-        # 6. Dynamic range compression (reduce volume spikes)
         audio_compressed = dynamic_range_compression(audio_denoised)
-        # 7. Final normalization
         audio_final = librosa.util.normalize(audio_compressed)
-        # Convert back to torch tensor
         audio_tensor = torch.from_numpy(audio_final).float().unsqueeze(0)
         print(f"✅ Preprocessing complete: {len(audio_final)/target_sr:.2f}s of audio")
@@ -130,9 +210,7 @@ def advanced_preprocess_audio(audio_path, target_sr=16000):
         return basic_preprocess_audio(audio_path, target_sr)
 def basic_preprocess_audio(audio_path, target_sr=16000):
-    """
-    Fallback basic preprocessing if advanced fails
-    """
     try:
         wav, sr = torchaudio.load(audio_path)
@@ -151,26 +229,17 @@ def basic_preprocess_audio(audio_path, target_sr=16000):
         raise
 def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0.6):
-    """
-    Advanced spectral noise gating using STFT
-    """
     try:
-        # Compute Short-Time Fourier Transform
         stft = librosa.stft(audio, n_fft=2048, hop_length=512)
         magnitude = np.abs(stft)
         phase = np.angle(stft)
-        # Estimate noise floor from quietest frames
         noise_profile = np.percentile(magnitude, noise_floor_percentile, axis=1, keepdims=True)
-        # Create noise gate mask (soft gating)
         snr = magnitude / (noise_profile + 1e-10)
         gate = np.minimum(1.0, np.maximum(0.0, (snr - 1.0) / 2.0))
-        # Apply gate with reduction
         magnitude_gated = magnitude * (gate + (1 - gate) * (1 - reduction_factor))
-        # Reconstruct signal
         stft_clean = magnitude_gated * np.exp(1j * phase)
         audio_clean = librosa.istft(stft_clean, hop_length=512)
@@ -180,15 +249,11 @@ def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0
         return audio
 def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
-    """
-    Simple dynamic range compression to reduce volume spikes
-    """
     try:
-        # Find samples above threshold
         abs_audio = np.abs(audio)
         above_threshold = abs_audio > threshold
-        # Apply compression to loud parts
         compressed = audio.copy()
         compressed[above_threshold] = np.sign(audio[above_threshold]) * (
             threshold + (abs_audio[above_threshold] - threshold) / ratio
@@ -200,21 +265,18 @@ def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
         return audio
 # ============================================
-# 3. AUDIO FEATURE EXTRACTION (PROSODY)
 # ============================================
 def extract_prosodic_features(audio, sr):
-    """
-    Extract prosodic features that indicate emotional state
-    """
     try:
         features = {}
-        # 1. Pitch variation (f0) with improved tracking
         pitches, magnitudes = librosa.piptrack(
             y=audio,
             sr=sr,
-            fmin=80,  # Typical human speech range
             fmax=400
         )
         pitch_values = []
@@ -231,20 +293,16 @@ def extract_prosodic_features(audio, sr):
         else:
             features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
-        # 2. Energy/Intensity
         rms = librosa.feature.rms(y=audio)[0]
         features['energy_mean'] = np.mean(rms)
         features['energy_std'] = np.std(rms)
-        # 3. Speech rate (zero crossing rate)
         zcr = librosa.feature.zero_crossing_rate(audio)[0]
         features['speech_rate'] = np.mean(zcr)
-        # 4. Spectral features
         spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
         features['spectral_centroid_mean'] = np.mean(spectral_centroid)
-        # 5. Spectral rolloff (brightness)
         spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
         features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
@@ -259,18 +317,12 @@ def extract_prosodic_features(audio, sr):
         }
 # ============================================
-# 4. LANGUAGE DETECTION & VALIDATION
 # ============================================
 def validate_hindi_text(text):
-    """
-    Validate if text contains Hindi/Devanagari characters
-    Supports Hinglish (Hindi + English)
-    """
-    # Devanagari Unicode range
     hindi_pattern = re.compile(r'[\u0900-\u097F]')
-    # Count Hindi characters
     hindi_chars = len(hindi_pattern.findall(text))
     total_chars = len(re.findall(r'\S', text))
@@ -279,20 +331,13 @@ def validate_hindi_text(text):
     hindi_ratio = hindi_chars / total_chars
-    # Allow Hinglish (at least 15% Hindi characters - more lenient)
     if hindi_ratio < 0.15:
         return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}% Hindi)", hindi_ratio
     return True, "Valid Hindi/Hinglish", hindi_ratio
-# ============================================
-# 5. ENHANCED SENTIMENT ANALYSIS
-# ============================================
 def detect_negation(text):
-    """
-    Detect negation words that might flip sentiment
-    """
     negation_words = [
         'नहीं', 'न', 'मत', 'नही', 'ना',
         'not', 'no', 'never', 'neither', 'nor',
@@ -306,15 +351,13 @@ def detect_negation(text):
     return False
 def detect_crisis_keywords(text):
-    """
-    Detect crisis/emergency keywords that indicate strong negative emotion
-    """
     crisis_keywords = [
-        'बचाओ', 'मदद', 'help', 'save',  # Distress calls
-        'मार', 'पीट', 'हिंसा', 'beat', 'hit', 'violence',  # Violence
-        'डर', 'खतरा', 'fear', 'danger',  # Fear/danger
-        'मर', 'मौत', 'death', 'die',  # Death
-        'छोड़', 'leave me', 'stop'  # Desperate pleas
     ]
     text_lower = text.lower()
@@ -324,15 +367,10 @@ def detect_crisis_keywords(text):
     return False
 def detect_mixed_emotions(text, prosodic_features):
-    """
-    Advanced mixed emotion detection using text and audio features
-    CRITICAL: Don't mark crisis/distress as mixed emotions
-    """
     text_lower = text.lower()
-    # FIRST: Check if this is a crisis situation (never mixed)
     if detect_crisis_keywords(text):
-        print("⚠️ Crisis keywords detected - NOT treating as mixed emotion")
         return False
     mixed_indicators = [
@@ -343,32 +381,26 @@ def detect_mixed_emotions(text, prosodic_features):
         'शायद', 'maybe', 'perhaps'
     ]
-    positive_words = ['खुश', 'प्यार', 'अच्छा', 'बढ़िया', 'मज़ा', 'happy', 'love', 'good', 'nice', 'सुंदर', 'प्रसन्न']
-    negative_words = ['दुख', 'रो', 'गुस्सा', 'बुरा', 'परेशान', 'sad', 'cry', 'angry', 'bad', 'upset', 'निराश', 'चिंता']
     has_mixed_indicators = any(ind in text_lower for ind in mixed_indicators)
     has_positive = any(word in text_lower for word in positive_words)
     has_negative = any(word in text_lower for word in negative_words)
-    # Only prosodic if both high pitch AND high energy variation
-    high_pitch_variation = prosodic_features['pitch_std'] > 35
-    high_energy_variation = prosodic_features['energy_std'] > 0.08
-    # Text must have BOTH opposing emotions to be truly mixed
     text_mixed = has_mixed_indicators and (has_positive and has_negative)
-    audio_mixed = high_pitch_variation and high_energy_variation and (has_positive and has_negative)
-    return text_mixed or audio_mixed
 def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
-    """
-    Enhanced sentiment analysis combining text and prosodic features
-    CRITICAL: Properly handle crisis/distress situations
-    """
     sentiment_scores = {}
     if not raw_results or not isinstance(raw_results, list) or len(raw_results) == 0:
-        print("⚠️ Unexpected sentiment results format")
         return {'Negative': 0.33, 'Neutral': 0.34, 'Positive': 0.33}, 0.34, False
     label_mapping = {
@@ -390,48 +422,26 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
         if sentiment not in sentiment_scores:
             sentiment_scores[sentiment] = 0.0
-    initial_confidence = max(sentiment_scores.values())
-    # CRITICAL: Check for crisis keywords first
     is_crisis = detect_crisis_keywords(text)
     if is_crisis:
-        print("🚨 CRISIS DETECTED - Strongly amplifying negative sentiment")
-        # Heavily boost negative sentiment for crisis situations
         sentiment_scores['Negative'] = min(0.95, sentiment_scores['Negative'] * 1.8)
         sentiment_scores['Neutral'] = max(0.02, sentiment_scores['Neutral'] * 0.2)
         sentiment_scores['Positive'] = max(0.01, sentiment_scores['Positive'] * 0.1)
-        is_mixed = False  # Crisis is NEVER mixed emotion
     else:
-        # Negation detection (only for non-crisis)
         has_negation = detect_negation(text)
         if has_negation:
-            print("🔄 Negation detected - adjusting sentiment")
             temp = sentiment_scores['Positive']
             sentiment_scores['Positive'] = sentiment_scores['Negative']
             sentiment_scores['Negative'] = temp
-        # Mixed emotions (only for non-crisis)
         is_mixed = detect_mixed_emotions(text, prosodic_features)
         if is_mixed:
-            print("🔄 Mixed emotions detected - boosting neutral")
-            neutral_boost = 0.20  # Reduced from 0.25
             sentiment_scores['Neutral'] = min(0.65, sentiment_scores['Neutral'] + neutral_boost)
             sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
             sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
-        # Prosodic adjustments (only for non-crisis)
-        if prosodic_features['pitch_std'] > 45 and prosodic_features['energy_mean'] > 0.12:
-            print("🎵 Strong emotional prosody detected")
-            if sentiment_scores['Positive'] > sentiment_scores['Negative']:
-                sentiment_scores['Positive'] = min(0.9, sentiment_scores['Positive'] * 1.2)
-            else:
-                sentiment_scores['Negative'] = min(0.9, sentiment_scores['Negative'] * 1.2)
-            sentiment_scores['Neutral'] = max(0.05, sentiment_scores['Neutral'] * 0.8)
-        elif prosodic_features['energy_mean'] < 0.03 and prosodic_features['pitch_std'] < 15:
-            print("🎵 Calm/neutral prosody detected")
-            sentiment_scores['Neutral'] = min(0.8, sentiment_scores['Neutral'] * 1.2)
-    # Normalize
     total = sum(sentiment_scores.values())
     if total > 0:
         sentiment_scores = {k: v/total for k, v in sentiment_scores.items()}
@@ -441,45 +451,41 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
     return sentiment_scores, final_confidence, is_mixed
 # ============================================
-# 6. MAIN PREDICTION FUNCTION
 # ============================================
 def predict(audio_filepath):
-    """
-    Main prediction function with Indic Conformer ASR
-    """
     try:
         print(f"\n{'='*60}")
         print(f"🎧 Processing audio file...")
         if audio_filepath is None:
-            return {"⚠️ Error": "No audio file uploaded"}
-        # ============================================
-        # STEP 1: Advanced Audio Preprocessing
-        # ============================================
         print("🔧 Applying advanced audio preprocessing...")
         try:
             audio_tensor, sr, audio_np = advanced_preprocess_audio(audio_filepath)
             prosodic_features = extract_prosodic_features(audio_np, sr)
         except Exception as e:
-            print(f"⚠️ Preprocessing error: {e}")
-            return {"⚠️ Preprocessing Error": str(e)}
-        # ============================================
-        # STEP 2: ASR with Indic Conformer
-        # ============================================
-        print("🔄 Transcribing with Indic Conformer (CTC & RNNT)...")
         try:
-            # Try RNNT first (usually more accurate)
             transcription_rnnt = ASR_MODEL(audio_tensor, "hi", "rnnt")
-            print(f"📝 RNNT Transcription: '{transcription_rnnt}'")
-            # Fallback to CTC if RNNT fails or is empty
             if not transcription_rnnt or len(transcription_rnnt.strip()) < 2:
-                print("⚠️ RNNT empty, trying CTC...")
                 transcription_ctc = ASR_MODEL(audio_tensor, "hi", "ctc")
-                print(f"📝 CTC Transcription: '{transcription_ctc}'")
                 transcription = transcription_ctc
             else:
                 transcription = transcription_rnnt
@@ -487,27 +493,33 @@ def predict(audio_filepath):
             transcription = transcription.strip()
         except Exception as asr_error:
-            print(f"❌ ASR Error: {asr_error}")
-            return {"⚠️ ASR Error": str(asr_error)}
-        # ============================================
-        # STEP 3: Validate Transcription
-        # ============================================
         if not transcription or len(transcription) < 2:
-            return {"⚠️ No Speech Detected": f"Transcription: {transcription or 'Empty'}"}
         is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
-        print(f"🔍 {validation_msg} ({hindi_ratio*100:.1f}% Hindi)")
         if not is_valid:
             return {
-                "⚠️ Language Error": validation_msg,
-                "📝 Transcription": transcription
             }
-        # ============================================
-        # STEP 4: Sentiment Analysis
-        # ============================================
         print("💭 Analyzing sentiment...")
         try:
             raw_sentiment = SENTIMENT_PIPELINE(transcription)
@@ -518,38 +530,68 @@ def predict(audio_filepath):
                 raw_sentiment
             )
-            # ============================================
-            # STEP 5: Format Results
-            # ============================================
-            result_dict = {}
-            for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
-                result_dict[sentiment] = float(score)
-            result_dict["_Confidence"] = float(confidence)
-            result_dict["_Mixed_Emotions"] = 1.0 if is_mixed else 0.0
-            result_dict["_Hindi_Content_Pct"] = float(hindi_ratio * 100)
-            print(f"📝 Full Transcription: {transcription}")
-            print(f"✅ Complete! Confidence: {confidence:.3f}")
-            print(f"🔀 Mixed Emotions: {'Yes' if is_mixed else 'No'}")
-            print(f"🌐 Hindi Content: {hindi_ratio*100:.0f}%")
             print(f"{'='*60}\n")
-            return result_dict
         except Exception as sentiment_error:
-            print(f"❌ Sentiment Error: {sentiment_error}")
-            return {"⚠️ Sentiment Error": str(sentiment_error)}
     except Exception as e:
-        print(f"❌ Critical Error: {str(e)}")
         import traceback
         traceback.print_exc()
-        return {"⚠️ System Error": str(e)}
 # ============================================
-# 7. GRADIO INTERFACE
 # ============================================
 demo = gr.Interface(
@@ -559,69 +601,134 @@ demo = gr.Interface(
         label="🎤 Record or Upload Hindi Audio",
         sources=["upload", "microphone"]
     ),
-    outputs=gr.Label(
-        label="🎭 Enhanced Sentiment Analysis Results",
-        num_top_classes=10
-    ),
-    title="🎤 Advanced Hindi Speech Sentiment Analysis (Indic Conformer)",
     description="""
-    ## 🇮🇳 Professional-grade Hindi/Hinglish Speech Emotion Analysis
-    ### ✨ Advanced Features:
-    - **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR with CTC & RNNT decoding
-    - **🧠 txlm-RoBERTa** - Hindi-optimized sentiment analysis
-    - **🎵 Prosodic Analysis** - Voice tone, pitch, energy, spectral features
-    - **🔄 Mixed Emotion Detection** - Handles complex feelings
     - **🌐 Hinglish Support** - Works with Hindi + English mix
-    - **🎯 Confidence Scoring** - Know how reliable the prediction is
-    - **🔧 Advanced Audio Preprocessing**:
-      - DC offset removal
-      - Aggressive silence trimming
-      - Pre-emphasis filtering
-      - Spectral noise gating
-      - Dynamic range compression
-      - Multi-stage normalization
-    - **⚡ Cached Models** - Fast predictions after first load
     ### 🧪 Test Examples:
-    - **😊 Positive**: "मैं बहुत खुश हूं आज"
-    - **😢 Negative**: "मुझे बहुत दुख हो रहा है"
-    - **😐 Neutral**: "मैं घर जा रहा हूं"
-    - **🔀 Mixed**: "कभी खुश हूं कभी उदास"
-    - **💭 Confused**: "समझ नहीं आ रहा क्या करूं"
-    - **🗣️ Hinglish**: "I'm feeling बहुत अच्छा today"
-    ### 📊 Output:
-    - Sentiment probabilities (Positive/Negative/Neutral)
-    - _Confidence: Prediction reliability
-    - _Mixed_Emotions: 1.0 if mixed, 0.0 if single emotion
-    - _Hindi_Content_Pct: % of Hindi characters
-    - Full transcription in console logs
-    ### 💡 Best Practices:
-    1. Speak clearly for 3-10 seconds
-    2. Reduce background noise when possible
-    3. Natural conversational tone works best
-    4. Both Hindi and Hinglish supported
-    ### 🎯 Use Cases:
-    - Mental health tracking
-    - Customer feedback analysis
-    - Call center monitoring
-    - Personal diary analysis
-    - Relationship counseling
     """,
-    examples=None,
     theme=gr.themes.Soft(),
     flagging_mode="never",
-    allow_flagging="never"
 )
 # ============================================
-# 8. LAUNCH APP
 # ============================================
 if __name__ == "__main__":
     print("🌐 Starting server...")
     demo.launch()
-    print("🎉 Enhanced Hindi Sentiment Analysis App is ready!")

 import re
 import warnings
 import os
+import json
 warnings.filterwarnings('ignore')
+print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
 # ============================================
 # 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
 # ============================================
 SENTIMENT_PIPELINE = None
 ASR_MODEL = None
 def load_models():
+    """Load all models once at startup and cache them globally"""
     global SENTIMENT_PIPELINE, ASR_MODEL
     if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None:
         print("✅ Models already loaded, skipping...")
         return
     print("📚 Loading Hindi sentiment analysis model...")
     try:
         sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
         print(f"❌ Error loading sentiment model: {e}")
         raise
     print("🎤 Loading Indic Conformer 600M ASR model...")
     try:
         ASR_MODEL = AutoModel.from_pretrained(
     print("✅ All models loaded and cached in memory")
 load_models()
 # ============================================
+# 2. EMOTION MAPPING
 # ============================================
+def map_sentiment_to_emotion(sentiment_scores, text, prosodic_features, is_mixed):
     """
+    Map sentiment to specific emotions with confidence
     """
+    # Get dominant sentiment
+    dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
+    max_score = sentiment_scores[dominant_sentiment]
+    # Detect crisis/distress
+    is_crisis = detect_crisis_keywords(text)
+    has_negation = detect_negation(text)
+    # Analyze text for specific emotions
+    text_lower = text.lower()
+    # Emotion keyword mapping
+    emotion_keywords = {
+        'joy': ['खुश', 'प्रसन्न', 'मज़ा', 'आनंद', 'happy', 'joy', 'excited', 'wonderful', 'बढ़िया', 'शानदार'],
+        'love': ['प्यार', 'love', 'दिल', 'heart', 'romantic', 'affection', 'स्नेह'],
+        'anger': ['गुस्सा', 'क्रोध', 'angry', 'mad', 'furious', 'rage', 'नाराज़'],
+        'fear': ['डर', 'भय', 'खतरा', 'fear', 'scared', 'afraid', 'terror', 'panic', 'चिंता'],
+        'sadness': ['दुख', 'रो', 'उदास', 'sad', 'cry', 'depressed', 'lonely', 'निराश', 'अकेला'],
+        'surprise': ['हैरान', 'आश्चर्य', 'surprise', 'shocked', 'amazed', 'unexpected', 'अचंभा'],
+        'disgust': ['घृणा', 'नफरत', 'disgust', 'hate', 'disgusting', 'gross'],
+        'anxiety': ['चिंता', 'तनाव', 'परेशान', 'worry', 'anxious', 'stress', 'nervous', 'बेचैन'],
+        'confusion': ['समझ नहीं', 'उलझन', 'confus', 'don\'t know', 'पता नहीं', 'क्या करूं'],
+        'calm': ['शांत', 'ठीक', 'calm', 'peace', 'okay', 'fine', 'normal', 'सामान्य']
+    }
+    # Detect specific emotions from text
+    detected_emotions = []
+    for emotion, keywords in emotion_keywords.items():
+        if any(keyword in text_lower for keyword in keywords):
+            detected_emotions.append(emotion)
+    # Prosodic analysis
+    high_energy = prosodic_features['energy_mean'] > 0.12
+    high_pitch_var = prosodic_features['pitch_std'] > 40
+    low_energy = prosodic_features['energy_mean'] < 0.03
+    calm_pitch = prosodic_features['pitch_std'] < 15
+    # Determine emotion
+    if is_crisis:
+        emotion = "fear"
+        secondary_emotion = "distress"
+        confidence = max(0.85, max_score)
+    elif is_mixed:
+        if len(detected_emotions) >= 2:
+            emotion = detected_emotions[0]
+            secondary_emotion = detected_emotions[1]
+        elif detected_emotions:
+            emotion = detected_emotions[0]
+            secondary_emotion = "neutral"
+        else:
+            emotion = "mixed"
+            secondary_emotion = None
+        confidence = sentiment_scores['Neutral']
+    elif detected_emotions:
+        # Use detected emotions
+        emotion = detected_emotions[0]
+        secondary_emotion = detected_emotions[1] if len(detected_emotions) > 1 else None
+        confidence = max_score
+    else:
+        # Map based on sentiment + prosody
+        secondary_emotion = None
+        if dominant_sentiment == 'Positive':
+            if high_energy and high_pitch_var:
+                emotion = "joy"
+                secondary_emotion = "excitement"
+            elif 'प्यार' in text_lower or 'love' in text_lower:
+                emotion = "love"
+            else:
+                emotion = "happiness"
+            confidence = max_score
+        elif dominant_sentiment == 'Negative':
+            if is_crisis or 'डर' in text_lower or 'fear' in text_lower:
+                emotion = "fear"
+            elif 'गुस्सा' in text_lower or 'angry' in text_lower:
+                emotion = "anger"
+            elif 'दुख' in text_lower or 'sad' in text_lower or 'रो' in text_lower:
+                emotion = "sadness"
+            elif 'चिंता' in text_lower or 'worry' in text_lower:
+                emotion = "anxiety"
+            else:
+                emotion = "sadness"
+            confidence = max_score
+        else:  # Neutral
+            if calm_pitch and low_energy:
+                emotion = "calm"
+            elif 'समझ नहीं' in text_lower or 'confus' in text_lower:
+                emotion = "confusion"
+            else:
+                emotion = "neutral"
+            confidence = max_score
+    return emotion, secondary_emotion, confidence
+# ============================================
+# 3. AUDIO PREPROCESSING FUNCTIONS
+# ============================================
+def advanced_preprocess_audio(audio_path, target_sr=16000):
+    """Advanced audio preprocessing pipeline"""
     try:
         wav, sr = torchaudio.load(audio_path)
         if wav.shape[0] > 1:
             wav = torch.mean(wav, dim=0, keepdim=True)
             print(f"📊 Converted stereo to mono")
         if sr != target_sr:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
             wav = resampler(wav)
             print(f"🔄 Resampled from {sr}Hz to {target_sr}Hz")
         audio_np = wav.squeeze().numpy()
         audio_np = audio_np - np.mean(audio_np)
+        audio_trimmed, _ = librosa.effects.trim(
             audio_np,
+            top_db=25,
             frame_length=2048,
             hop_length=512
         )
         print(f"✂️ Trimmed {len(audio_np) - len(audio_trimmed)} silent samples")
         audio_normalized = librosa.util.normalize(audio_trimmed)
         pre_emphasis = 0.97
         audio_emphasized = np.append(
             audio_normalized[0],
             audio_normalized[1:] - pre_emphasis * audio_normalized[:-1]
         )
         audio_denoised = spectral_noise_gate(audio_emphasized, target_sr)
         audio_compressed = dynamic_range_compression(audio_denoised)
         audio_final = librosa.util.normalize(audio_compressed)
         audio_tensor = torch.from_numpy(audio_final).float().unsqueeze(0)
         print(f"✅ Preprocessing complete: {len(audio_final)/target_sr:.2f}s of audio")
         return basic_preprocess_audio(audio_path, target_sr)
 def basic_preprocess_audio(audio_path, target_sr=16000):
+    """Fallback basic preprocessing"""
     try:
         wav, sr = torchaudio.load(audio_path)
         raise
 def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0.6):
+    """Advanced spectral noise gating using STFT"""
     try:
         stft = librosa.stft(audio, n_fft=2048, hop_length=512)
         magnitude = np.abs(stft)
         phase = np.angle(stft)
         noise_profile = np.percentile(magnitude, noise_floor_percentile, axis=1, keepdims=True)
         snr = magnitude / (noise_profile + 1e-10)
         gate = np.minimum(1.0, np.maximum(0.0, (snr - 1.0) / 2.0))
         magnitude_gated = magnitude * (gate + (1 - gate) * (1 - reduction_factor))
         stft_clean = magnitude_gated * np.exp(1j * phase)
         audio_clean = librosa.istft(stft_clean, hop_length=512)
         return audio
 def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
+    """Simple dynamic range compression"""
     try:
         abs_audio = np.abs(audio)
         above_threshold = abs_audio > threshold
         compressed = audio.copy()
         compressed[above_threshold] = np.sign(audio[above_threshold]) * (
             threshold + (abs_audio[above_threshold] - threshold) / ratio
         return audio
 # ============================================
+# 4. PROSODIC FEATURE EXTRACTION
 # ============================================
 def extract_prosodic_features(audio, sr):
+    """Extract prosodic features"""
     try:
         features = {}
         pitches, magnitudes = librosa.piptrack(
             y=audio,
             sr=sr,
+            fmin=80,
             fmax=400
         )
         pitch_values = []
         else:
             features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
         rms = librosa.feature.rms(y=audio)[0]
         features['energy_mean'] = np.mean(rms)
         features['energy_std'] = np.std(rms)
         zcr = librosa.feature.zero_crossing_rate(audio)[0]
         features['speech_rate'] = np.mean(zcr)
         spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
         features['spectral_centroid_mean'] = np.mean(spectral_centroid)
         spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
         features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
         }
 # ============================================
+# 5. TEXT ANALYSIS HELPERS
 # ============================================
 def validate_hindi_text(text):
+    """Validate if text contains Hindi/Devanagari characters"""
     hindi_pattern = re.compile(r'[\u0900-\u097F]')
     hindi_chars = len(hindi_pattern.findall(text))
     total_chars = len(re.findall(r'\S', text))
     hindi_ratio = hindi_chars / total_chars
     if hindi_ratio < 0.15:
         return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}% Hindi)", hindi_ratio
     return True, "Valid Hindi/Hinglish", hindi_ratio
 def detect_negation(text):
+    """Detect negation words"""
     negation_words = [
         'नहीं', 'न', 'मत', 'नही', 'ना',
         'not', 'no', 'never', 'neither', 'nor',
     return False
 def detect_crisis_keywords(text):
+    """Detect crisis/emergency keywords"""
     crisis_keywords = [
+        'बचाओ', 'मदद', 'help', 'save',
+        'मार', 'पीट', 'हिंसा', 'beat', 'hit', 'violence',
+        'डर', 'खतरा', 'fear', 'danger',
+        'मर', 'मौत', 'death', 'die',
+        'छोड़', 'leave me', 'stop'
     ]
     text_lower = text.lower()
     return False
 def detect_mixed_emotions(text, prosodic_features):
+    """Detect mixed emotions"""
     text_lower = text.lower()
     if detect_crisis_keywords(text):
         return False
     mixed_indicators = [
         'शायद', 'maybe', 'perhaps'
     ]
+    positive_words = ['खुश', 'प्यार', 'अच्छा', 'बढ़िया', 'मज़ा', 'happy', 'love', 'good', 'nice']
+    negative_words = ['दुख', 'रो', 'गुस्सा', 'बुरा', 'परेशान', 'sad', 'cry', 'angry', 'bad', 'upset']
     has_mixed_indicators = any(ind in text_lower for ind in mixed_indicators)
     has_positive = any(word in text_lower for word in positive_words)
     has_negative = any(word in text_lower for word in negative_words)
     text_mixed = has_mixed_indicators and (has_positive and has_negative)
+    return text_mixed
+# ============================================
+# 6. ENHANCED SENTIMENT ANALYSIS
+# ============================================
 def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
+    """Enhanced sentiment analysis"""
     sentiment_scores = {}
     if not raw_results or not isinstance(raw_results, list) or len(raw_results) == 0:
         return {'Negative': 0.33, 'Neutral': 0.34, 'Positive': 0.33}, 0.34, False
     label_mapping = {
         if sentiment not in sentiment_scores:
             sentiment_scores[sentiment] = 0.0
     is_crisis = detect_crisis_keywords(text)
     if is_crisis:
         sentiment_scores['Negative'] = min(0.95, sentiment_scores['Negative'] * 1.8)
         sentiment_scores['Neutral'] = max(0.02, sentiment_scores['Neutral'] * 0.2)
         sentiment_scores['Positive'] = max(0.01, sentiment_scores['Positive'] * 0.1)
+        is_mixed = False
     else:
         has_negation = detect_negation(text)
         if has_negation:
             temp = sentiment_scores['Positive']
             sentiment_scores['Positive'] = sentiment_scores['Negative']
             sentiment_scores['Negative'] = temp
         is_mixed = detect_mixed_emotions(text, prosodic_features)
         if is_mixed:
+            neutral_boost = 0.20
             sentiment_scores['Neutral'] = min(0.65, sentiment_scores['Neutral'] + neutral_boost)
             sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
             sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
     total = sum(sentiment_scores.values())
     if total > 0:
         sentiment_scores = {k: v/total for k, v in sentiment_scores.items()}
     return sentiment_scores, final_confidence, is_mixed
 # ============================================
+# 7. MAIN PREDICTION FUNCTION
 # ============================================
 def predict(audio_filepath):
+    """Main prediction function - Returns JSON-parseable dict"""
     try:
         print(f"\n{'='*60}")
         print(f"🎧 Processing audio file...")
         if audio_filepath is None:
+            return {
+                "status": "error",
+                "error_type": "no_audio",
+                "message": "No audio file uploaded"
+            }
+        # Preprocessing
         print("🔧 Applying advanced audio preprocessing...")
         try:
             audio_tensor, sr, audio_np = advanced_preprocess_audio(audio_filepath)
             prosodic_features = extract_prosodic_features(audio_np, sr)
         except Exception as e:
+            return {
+                "status": "error",
+                "error_type": "preprocessing_error",
+                "message": str(e)
+            }
+        # ASR Transcription
+        print("🔄 Transcribing with Indic Conformer...")
         try:
             transcription_rnnt = ASR_MODEL(audio_tensor, "hi", "rnnt")
             if not transcription_rnnt or len(transcription_rnnt.strip()) < 2:
                 transcription_ctc = ASR_MODEL(audio_tensor, "hi", "ctc")
                 transcription = transcription_ctc
             else:
                 transcription = transcription_rnnt
             transcription = transcription.strip()
         except Exception as asr_error:
+            return {
+                "status": "error",
+                "error_type": "asr_error",
+                "message": str(asr_error)
+            }
+        # Validation
         if not transcription or len(transcription) < 2:
+            return {
+                "status": "error",
+                "error_type": "no_speech",
+                "message": "No speech detected in the audio",
+                "transcription": transcription or ""
+            }
         is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
         if not is_valid:
             return {
+                "status": "error",
+                "error_type": "language_error",
+                "message": validation_msg,
+                "transcription": transcription,
+                "hindi_content_percentage": round(hindi_ratio * 100, 2)
             }
+        # Sentiment Analysis
         print("💭 Analyzing sentiment...")
         try:
             raw_sentiment = SENTIMENT_PIPELINE(transcription)
                 raw_sentiment
             )
+            # Map to emotion
+            emotion, secondary_emotion, emotion_confidence = map_sentiment_to_emotion(
+                sentiment_scores,
+                transcription,
+                prosodic_features,
+                is_mixed
+            )
+            # Build structured output
+            result = {
+                "status": "success",
+                "transcription": transcription,
+                "emotion": {
+                    "primary": emotion,
+                    "secondary": secondary_emotion,
+                    "confidence": round(emotion_confidence, 4)
+                },
+                "sentiment_scores": {
+                    "positive": round(sentiment_scores['Positive'], 4),
+                    "neutral": round(sentiment_scores['Neutral'], 4),
+                    "negative": round(sentiment_scores['Negative'], 4)
+                },
+                "analysis": {
+                    "mixed_emotions": is_mixed,
+                    "hindi_content_percentage": round(hindi_ratio * 100, 2),
+                    "is_crisis": detect_crisis_keywords(transcription),
+                    "has_negation": detect_negation(transcription)
+                },
+                "prosodic_features": {
+                    "pitch_mean": round(prosodic_features['pitch_mean'], 2),
+                    "pitch_std": round(prosodic_features['pitch_std'], 2),
+                    "energy_mean": round(prosodic_features['energy_mean'], 4),
+                    "energy_std": round(prosodic_features['energy_std'], 4),
+                    "speech_rate": round(prosodic_features['speech_rate'], 4)
+                }
+            }
+            print(f"✅ Detected Emotion: {emotion}")
+            print(f"📝 Transcription: {transcription}")
             print(f"{'='*60}\n")
+            return result
         except Exception as sentiment_error:
+            return {
+                "status": "error",
+                "error_type": "sentiment_error",
+                "message": str(sentiment_error),
+                "transcription": transcription
+            }
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return {
+            "status": "error",
+            "error_type": "system_error",
+            "message": str(e)
+        }
 # ============================================
+# 8. GRADIO INTERFACE
 # ============================================
 demo = gr.Interface(
         label="🎤 Record or Upload Hindi Audio",
         sources=["upload", "microphone"]
     ),
+    outputs=gr.JSON(label="📊 Emotion Analysis Results (API-Ready JSON)"),
+    title="🎭 Hindi Speech Emotion Analysis API",
     description="""
+    ## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion Detection
+    ### ✨ Features:
+    - **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
+    - **🧠 Emotion Detection** - Joy, Sadness, Anger, Fear, Love, Calm, etc.
+    - **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
     - **🌐 Hinglish Support** - Works with Hindi + English mix
+    - **📝 JSON Output** - Easy to parse for API integration
+    ### 📊 JSON Output Format:
+    ```json
+    {
+      "status": "success",
+      "transcription": "मैं बहुत खुश हूं",
+      "emotion": {
+        "primary": "joy",
+        "secondary": null,
+        "confidence": 0.8745
+      },
+      "sentiment_scores": {
+        "positive": 0.8745,
+        "neutral": 0.0923,
+        "negative": 0.0332
+      },
+      "analysis": {
+        "mixed_emotions": false,
+        "hindi_content_percentage": 100.0,
+        "is_crisis": false,
+        "has_negation": false
+      },
+      "prosodic_features": {
+        "pitch_mean": 180.45,
+        "pitch_std": 35.12,
+        "energy_mean": 0.0876,
+        "energy_std": 0.0234,
+        "speech_rate": 0.1234
+      }
+    }
+    ```
+    ### 🎯 Supported Emotions:
+    - **Positive**: joy, happiness, love, excitement, calm
+    - **Negative**: sadness, anger, fear, anxiety, disgust
+    - **Neutral**: neutral, confusion, mixed
     ### 🧪 Test Examples:
+    - **😊 Joy**: "मैं बहुत खुश हूं आज"
+    - **😢 Sadness**: "मुझे बहुत दुख हो रहा है"
+    - **😠 Anger**: "मुझे बहुत गुस्सा आ रहा है"
+    - **😨 Fear**: "मुझे डर लग रहा है"
+    - **😐 Calm**: "सब ठीक है, मैं शांत हूं"
+    - **❤️ Love**: "मुझे तुमसे बहुत प्यार है"
+    ### 💡 API Usage:
+    1. Send audio file to the endpoint
+    2. Receive structured JSON response
+    3. Parse `emotion.primary` for the main emotion
+    4. Use `transcription` for text analysis
+    5. Check `analysis.mixed_emotions` for complex states
+    ### 🔗 Integration Examples:
+    **Python API Client:**
+    ```python
+    import requests
+    # Send audio file
+    with open("audio.wav", "rb") as f:
+        response = requests.post(
+            "YOUR_API_URL/predict",
+            files={"audio": f}
+        )
+    result = response.json()
+    if result["status"] == "success":
+        print(f"Emotion: {result['emotion']['primary']}")
+        print(f"Text: {result['transcription']}")
+        print(f"Confidence: {result['emotion']['confidence']}")
+    ```
+    **Database Storage:**
+    ```python
+    # Store in MongoDB
+    db.emotions.insert_one({
+        "user_id": user_id,
+        "timestamp": datetime.now(),
+        "emotion": result["emotion"]["primary"],
+        "transcription": result["transcription"],
+        "confidence": result["emotion"]["confidence"],
+        "sentiment_positive": result["sentiment_scores"]["positive"],
+        "is_crisis": result["analysis"]["is_crisis"]
+    })
+    ```
+    **React/JavaScript:**
+    ```javascript
+    const formData = new FormData();
+    formData.append('audio', audioBlob);
+    fetch('YOUR_API_URL/predict', {
+        method: 'POST',
+        body: formData
+    })
+    .then(res => res.json())
+    .then(data => {
+        if (data.status === 'success') {
+            console.log('Emotion:', data.emotion.primary);
+            console.log('Text:', data.transcription);
+        }
+    });
+    ```
     """,
     theme=gr.themes.Soft(),
     flagging_mode="never",
+    examples=[
+        ["examples/happy.wav"] if os.path.exists("examples/happy.wav") else None,
+    ] if os.path.exists("examples") else None
 )
 # ============================================
+# 9. LAUNCH APP
 # ============================================
 if __name__ == "__main__":
     print("🌐 Starting server...")
     demo.launch()
+    print("🎉 Hindi Emotion Analysis API is ready!")