Spaces:

JustNikunj
/

Sentimental_Analysis

Sleeping

App Files Files Community

JustNikunj commited on Oct 6, 2025

Commit

28bbae1

verified ·

1 Parent(s): f3f209d

Update app.py

Browse files

Files changed (1) hide show

app.py +484 -190

app.py CHANGED Viewed

@@ -1,263 +1,557 @@
 import gradio as gr
 import torch
-from transformers import pipeline
 import librosa
 import numpy as np
-import asyncio
 import re
-print("🚀 Starting Hindi Speech Sentiment Analysis App...")
-# Load sentiment analysis model
-print("📚 Loading sentiment analysis model...")
 try:
     sentiment_pipeline = pipeline(
         "text-classification",
-        model="LondonStory/txlm-roberta-hindi-sentiment",
         top_k=None
     )
-    print("✅ Sentiment model loaded successfully")
 except Exception as e:
     print(f"❌ Error loading sentiment model: {e}")
-# Use a working Hindi ASR model - go back to original with proper handling
-print("🎤 Loading Hindi ASR model...")
 try:
-    # Try the original Hindi model but with basic Wav2Vec2 components
-    from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-    print("Loading ai4bharat Hindi model with basic processor...")
-    processor = Wav2Vec2Processor.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    model = Wav2Vec2ForCTC.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    # Create custom ASR function instead of pipeline
-    def custom_asr(audio_file):
-        import librosa
-        import torch
-        # Load audio
-        audio_array, sample_rate = librosa.load(audio_file, sr=16000)
-        # Process with the model
-        inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        # Get predictions
-        predicted_ids = torch.argmax(logits, dim=-1)
-        transcription = processor.batch_decode(predicted_ids)[0]
-        return {"text": transcription}
-    asr_pipeline = custom_asr
-    print("✅ ai4bharat Hindi ASR model loaded successfully")
 except Exception as e:
-    print(f"❌ Error loading ai4bharat model: {e}")
-    print("Trying Whisper as fallback...")
     try:
-        # Fallback to Whisper with proper Hindi settings
         asr_pipeline = pipeline(
             "automatic-speech-recognition",
-            model="openai/whisper-tiny",
             device="cpu"
         )
-        print("✅ Whisper fallback ASR model loaded successfully")
     except Exception as e2:
         print(f"❌ Error loading any ASR model: {e2}")
-async def predict(audio_filepath):
     """
-    Process audio and return sentiment analysis using Whisper + LondonStory
     """
     try:
-        print(f"\n{'='*50}")
-        print(f"🎧 Processing new audio file...")
         if audio_filepath is None:
             print("❌ No audio file provided")
-            return {"⚠️ No Audio": 1.0}
-        print(f"📂 File path: {audio_filepath}")
-        # Transcribe audio using ASR model
-        print("🔄 Transcribing audio...")
         try:
-            # Handle both custom function and pipeline
-            if callable(asr_pipeline) and not hasattr(asr_pipeline, 'model'):
-                # Custom function
-                result = asr_pipeline(audio_filepath)
-            else:
-                # Pipeline - try with Hindi language setting if it's Whisper
-                try:
-                    result = asr_pipeline(
-                        audio_filepath,
-                        generate_kwargs={"language": "hindi", "task": "transcribe"}
-                    )
-                except:
-                    # Fallback to basic call
-                    result = asr_pipeline(audio_filepath)
             transcription = result["text"].strip()
-            print(f"📝 ASR transcription: '{transcription}'")
-            # Handle empty transcription
-            if not transcription:
-                print("⚠️ Empty transcription from Whisper")
-                return {"No Speech": 1.0}
         except Exception as asr_error:
-            print(f"❌ Whisper ASR Error: {asr_error}")
-            return {"ASR Error": 1.0}
-        # Perform enhanced sentiment analysis
-        print("💭 Analyzing sentiment with enhanced logic...")
         try:
-            # Get raw sentiment results
-            sentiment_results = sentiment_pipeline(transcription)
-            print(f"📊 Raw sentiment results: {sentiment_results}")
-            # Enhanced sentiment analysis for complex emotional text
-            def enhance_sentiment_analysis(text, raw_results):
-                """
-                Enhance sentiment analysis for mixed emotions and complex text
-                """
-                # Check for mixed emotion indicators in Hindi
-                mixed_indicators = [
-                    'कभी', 'कभीकभी', 'sometimes', 'कभी कभी',  # sometimes
-                    'लेकिन', 'पर', 'but',  # but
-                    'समझ नहीं आ रहा', 'confuse', 'confusion',  # confused
-                    'या', 'or',  # or (indicates uncertainty)
-                    'क्या', 'does', 'whether'  # question words
-                ]
-                # Check for contrasting emotions in same text
-                positive_words = ['खुश', 'प्यार', 'happy', 'love', 'अच्छा']
-                negative_words = ['रो', 'दुख', 'cry', 'sad', 'परेशान']
-                text_lower = text.lower()
-                has_mixed = any(indicator in text_lower for indicator in mixed_indicators)
-                has_positive = any(word in text_lower for word in positive_words)
-                has_negative = any(word in text_lower for word in negative_words)
-                # If text has mixed indicators or contrasting emotions
-                if has_mixed or (has_positive and has_negative):
-                    print("🔄 Detected mixed emotions - adjusting sentiment scores...")
-                    # Get original scores
-                    original_scores = {result['label']: result['score'] for result in raw_results[0]}
-                    # Boost neutral score for mixed emotions
-                    neutral_boost = 0.3
-                    negative_score = original_scores.get('LABEL_0', 0)
-                    positive_score = original_scores.get('LABEL_2', 0)
-                    neutral_score = original_scores.get('LABEL_1', 0)
-                    # Redistribute scores to favor neutral
-                    adjusted_scores = {
-                        'LABEL_0': max(0.1, negative_score - neutral_boost/2),
-                        'LABEL_1': min(0.8, neutral_score + neutral_boost),
-                        'LABEL_2': max(0.1, positive_score - neutral_boost/2)
-                    }
-                    # Normalize to sum to 1
-                    total = sum(adjusted_scores.values())
-                    adjusted_scores = {k: v/total for k, v in adjusted_scores.items()}
-                    print(f"🔧 Adjusted for mixed emotions: {adjusted_scores}")
-                    return [{'label': k, 'score': v} for k, v in adjusted_scores.items()]
-                return raw_results[0]
-            # Apply enhanced sentiment analysis
-            enhanced_results = enhance_sentiment_analysis(transcription, sentiment_results)
-            # Format results for Gradio
             result_dict = {}
-            label_mapping = {
-                'LABEL_0': 'Negative',
-                'LABEL_1': 'Neutral',
-                'LABEL_2': 'Positive'
-            }
-            for result in enhanced_results:
-                raw_label = result['label']
-                score = result['score']
-                sentiment_name = label_mapping.get(raw_label, raw_label)
-                result_dict[sentiment_name] = float(score)
-            # Add transcription to the visible results
-            result_dict['📝 Transcription'] = transcription
-            # Log success details
-            print(f"✅ SUCCESS! Processing completed")
-            print(f"📝 Final transcription: '{transcription}'")
-            for label, score in result_dict.items():
-                if label != '📝 Transcription':  # Don't print transcription twice
-                    print(f"📊 {label}: {score:.3f}")
-            print(f"{'='*50}\n")
             return result_dict
         except Exception as sentiment_error:
-            print(f"❌ Sentiment Analysis Error: {sentiment_error}")
-            return {"Sentiment Error": 1.0}
     except Exception as e:
-        print(f"❌ General Error: {str(e)}")
-        return {"Processing Error": 1.0}
-# Create Gradio interface with async support
 demo = gr.Interface(
-    fn=predict,
     inputs=gr.Audio(
         type="filepath",
         label="🎤 Record or Upload Hindi Audio",
         sources=["upload", "microphone"]
     ),
     outputs=gr.Label(
-        label="🎭 Sentiment Analysis Results",
-        num_top_classes=6  # Increased to show transcription + 3 sentiments
     ),
-    title="🎤 Hindi Speech Sentiment Analysis (Enhanced + Async)",
     description="""
-    ## 🇮🇳 Analyze sentiment from Hindi speech with enhanced emotion detection
-    ### 🔄 How it works:
-    1. **🎤 AI Speech Recognition** → Converts Hindi speech to Devanagari text
-    2. **💭 Enhanced Sentiment AI** → Analyzes emotions with mixed-emotion detection
-    3. **⚡ Async Processing** → Faster response times
-    ### 🧪 Test Phrases (speak clearly):
-    - **😊 Happy**: "मैं बहुत खुश हूं" *(Main bahut khush hun)*
-    - **😠 Sad**: "मुझे दुख है" *(Mujhe dukh hai)*
-    - **😐 Neutral/Mixed**: "कभी खुश कभी उदास हूं" *(Sometimes happy, sometimes sad)*
-    - **❤️ Love**: "मुझे यह पसंद है" *(Mujhe yeh pasand hai)*
-    - **🤔 Confused**: "समझ नहीं आ रहा क्या करूं" *(Don't understand what to do)*
-    ### 📋 Instructions:
-    1. Click the microphone to record or upload an audio file
-    2. Speak clearly in Hindi for 3-10 seconds
-    3. Click Submit and check results + logs below
-    ### 🔍 Enhanced Features:
-    - **Mixed emotion detection** for complex feelings
-    - **Context-aware sentiment** analysis
-    - **Async processing** for better performance
-    - **Supports various Hindi dialects** and speaking styles
-    ### 💡 Perfect for:
-    - **Personal diary analysis** - Understanding your emotional patterns
-    - **Relationship counseling** - Analyzing complex feelings
-    - **Mental health tracking** - Monitoring emotional states over time
     """,
     examples=None,
     theme=gr.themes.Soft(),
-    flagging_mode="never"
 )
-# Launch the app
 if __name__ == "__main__":
     print("🌐 Starting server...")
     demo.launch(
@@ -265,4 +559,4 @@ if __name__ == "__main__":
         server_port=7860,
         show_error=True
     )
-    print("🎉 Whisper + Hindi Sentiment Analysis App is ready!")

 import gradio as gr
 import torch
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import librosa
 import numpy as np
 import re
+from scipy import signal
+import warnings
+warnings.filterwarnings('ignore')
+print("🚀 Starting Enhanced Hindi Speech Sentiment Analysis App...")
+# ============================================
+# 1. LOAD MODELS
+# ============================================
+# Load XLM-RoBERTa Hindi Sentiment Model (Better accuracy)
+print("📚 Loading XLM-RoBERTa sentiment analysis model...")
 try:
+    sentiment_model_name = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
+    sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
+    sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)
     sentiment_pipeline = pipeline(
         "text-classification",
+        model=sentiment_model,
+        tokenizer=sentiment_tokenizer,
         top_k=None
     )
+    print("✅ XLM-RoBERTa sentiment model loaded successfully")
 except Exception as e:
     print(f"❌ Error loading sentiment model: {e}")
+    raise
+# Load IndicWhisper for Hindi ASR (Best for Indian languages)
+print("🎤 Loading IndicWhisper Hindi ASR model...")
 try:
+    asr_pipeline = pipeline(
+        "automatic-speech-recognition",
+        model="vasista22/whisper-hindi-medium",  # IndicWhisper variant
+        device="cpu",
+        chunk_length_s=30
+    )
+    print("✅ IndicWhisper Hindi ASR model loaded successfully")
 except Exception as e:
+    print(f"⚠️ Error loading IndicWhisper, trying fallback: {e}")
     try:
+        # Fallback to OpenAI Whisper with Hindi optimization
         asr_pipeline = pipeline(
             "automatic-speech-recognition",
+            model="openai/whisper-small",
             device="cpu"
         )
+        print("✅ Whisper-small fallback loaded successfully")
     except Exception as e2:
         print(f"❌ Error loading any ASR model: {e2}")
+        raise
+# ============================================
+# 2. AUDIO PREPROCESSING FUNCTIONS
+# ============================================
+def preprocess_audio(audio_path, target_sr=16000):
+    """
+    Advanced audio preprocessing for better ASR accuracy
+    """
+    try:
+        print("🔧 Preprocessing audio...")
+        # Load audio
+        audio, sr = librosa.load(audio_path, sr=target_sr, mono=True)
+        # 1. Remove silence from beginning and end
+        audio_trimmed, _ = librosa.effects.trim(audio, top_db=20, frame_length=2048, hop_length=512)
+        # 2. Normalize audio amplitude
+        audio_normalized = librosa.util.normalize(audio_trimmed)
+        # 3. Apply pre-emphasis filter (boost high frequencies for speech clarity)
+        pre_emphasis = 0.97
+        audio_emphasized = np.append(audio_normalized[0],
+                                     audio_normalized[1:] - pre_emphasis * audio_normalized[:-1])
+        # 4. Apply noise reduction using spectral gating
+        audio_denoised = reduce_noise(audio_emphasized, sr)
+        print(f"✅ Audio preprocessed: {len(audio)//sr}s → {len(audio_denoised)//sr}s (after trim)")
+        return audio_denoised, sr
+    except Exception as e:
+        print(f"⚠️ Preprocessing warning: {e}, using original audio")
+        audio, sr = librosa.load(audio_path, sr=target_sr)
+        return audio, sr
+def reduce_noise(audio, sr, noise_reduce_factor=0.5):
     """
+    Simple spectral noise reduction
     """
     try:
+        # Compute STFT
+        stft = librosa.stft(audio)
+        magnitude = np.abs(stft)
+        phase = np.angle(stft)
+        # Estimate noise from quietest frames
+        noise_profile = np.percentile(magnitude, 10, axis=1, keepdims=True)
+        # Subtract noise
+        magnitude_cleaned = np.maximum(magnitude - noise_reduce_factor * noise_profile, 0)
+        # Reconstruct audio
+        stft_cleaned = magnitude_cleaned * np.exp(1j * phase)
+        audio_cleaned = librosa.istft(stft_cleaned)
+        return audio_cleaned
+    except:
+        return audio
+# ============================================
+# 3. AUDIO FEATURE EXTRACTION (PROSODY)
+# ============================================
+def extract_prosodic_features(audio, sr):
+    """
+    Extract prosodic features that indicate emotional state
+    """
+    try:
+        features = {}
+        # 1. Pitch variation (f0)
+        pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
+        pitch_values = []
+        for t in range(pitches.shape[1]):
+            index = magnitudes[:, t].argmax()
+            pitch = pitches[index, t]
+            if pitch > 0:
+                pitch_values.append(pitch)
+        if pitch_values:
+            features['pitch_mean'] = np.mean(pitch_values)
+            features['pitch_std'] = np.std(pitch_values)
+            features['pitch_range'] = np.max(pitch_values) - np.min(pitch_values)
+        else:
+            features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
+        # 2. Energy/Intensity
+        rms = librosa.feature.rms(y=audio)[0]
+        features['energy_mean'] = np.mean(rms)
+        features['energy_std'] = np.std(rms)
+        # 3. Speech rate (zero crossing rate as proxy)
+        zcr = librosa.feature.zero_crossing_rate(audio)[0]
+        features['speech_rate'] = np.mean(zcr)
+        # 4. Spectral features
+        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
+        features['spectral_centroid_mean'] = np.mean(spectral_centroid)
+        print(f"🎵 Prosodic features: Pitch STD={features['pitch_std']:.1f}, Energy={features['energy_mean']:.3f}")
+        return features
+    except Exception as e:
+        print(f"⚠️ Feature extraction error: {e}")
+        return {
+            'pitch_mean': 0, 'pitch_std': 0, 'pitch_range': 0,
+            'energy_mean': 0, 'energy_std': 0, 'speech_rate': 0,
+            'spectral_centroid_mean': 0
+        }
+# ============================================
+# 4. LANGUAGE DETECTION & VALIDATION
+# ============================================
+def validate_hindi_text(text):
+    """
+    Validate if text contains Hindi/Devanagari characters
+    Supports Hinglish (Hindi + English)
+    """
+    # Devanagari Unicode range
+    hindi_pattern = re.compile(r'[\u0900-\u097F]')
+    # Count Hindi characters
+    hindi_chars = len(hindi_pattern.findall(text))
+    total_chars = len(re.findall(r'\S', text))  # Non-whitespace chars
+    if total_chars == 0:
+        return False, "Empty transcription", 0
+    hindi_ratio = hindi_chars / total_chars
+    # Allow Hinglish (at least 20% Hindi characters)
+    if hindi_ratio < 0.2:
+        return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}% Hindi)", hindi_ratio
+    return True, "Valid Hindi/Hinglish", hindi_ratio
+def transliterate_to_hindi(text):
+    """
+    If text is in Roman script, attempt to keep Hindi words
+    This is a placeholder - in production, use proper transliteration library
+    """
+    # For now, just return original text
+    # In production, use: indic-transliteration or aksharamukha library
+    return text
+# ============================================
+# 5. ENHANCED SENTIMENT ANALYSIS
+# ============================================
+def detect_negation(text):
+    """
+    Detect negation words that might flip sentiment
+    """
+    negation_words = [
+        'नहीं', 'न', 'मत', 'नही', 'ना',  # Hindi
+        'not', 'no', 'never', 'neither', 'nor',  # English
+        'कभी नहीं', 'बिल्कुल नहीं'
+    ]
+    text_lower = text.lower()
+    for neg_word in negation_words:
+        if neg_word in text_lower:
+            return True
+    return False
+def detect_mixed_emotions(text, prosodic_features):
+    """
+    Advanced mixed emotion detection using text and audio features
+    """
+    text_lower = text.lower()
+    # Text-based mixed emotion indicators
+    mixed_indicators = [
+        'कभी', 'कभी कभी', 'sometimes',
+        'लेकिन', 'पर', 'मगर', 'but', 'however',
+        'या', 'or',
+        'समझ नहीं', 'confus', 'don\'t know', 'पता नहीं',
+        'शायद', 'maybe', 'perhaps'
+    ]
+    # Emotional contrasts
+    positive_words = ['खुश', 'प्यार', 'अच्छा', 'बढ़िया', 'मज़ा', 'happy', 'love', 'good', 'nice']
+    negative_words = ['दुख', 'रो', 'गुस्सा', 'बुरा', 'परेशान', 'sad', 'cry', 'angry', 'bad', 'upset']
+    has_mixed_indicators = any(ind in text_lower for ind in mixed_indicators)
+    has_positive = any(word in text_lower for word in positive_words)
+    has_negative = any(word in text_lower for word in negative_words)
+    # Prosodic indicators of mixed emotions
+    high_pitch_variation = prosodic_features['pitch_std'] > 30  # High variation suggests uncertainty
+    high_energy_variation = prosodic_features['energy_std'] > 0.05
+    # Combine signals
+    text_mixed = has_mixed_indicators or (has_positive and has_negative)
+    audio_mixed = high_pitch_variation and high_energy_variation
+    is_mixed = text_mixed or audio_mixed
+    if is_mixed:
+        print(f"🔄 Mixed emotions detected: Text={text_mixed}, Audio={audio_mixed}")
+    return is_mixed
+def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
+    """
+    Enhanced sentiment analysis combining text and prosodic features
+    """
+    # Parse raw results
+    sentiment_scores = {}
+    label_mapping = {
+        'negative': 'Negative',
+        'neutral': 'Neutral',
+        'positive': 'Positive',
+        'LABEL_0': 'Negative',
+        'LABEL_1': 'Neutral',
+        'LABEL_2': 'Positive'
+    }
+    for result in raw_results[0]:
+        label = result['label'].lower()
+        mapped_label = label_mapping.get(label, label_mapping.get(result['label'], 'Neutral'))
+        sentiment_scores[mapped_label] = result['score']
+    # Ensure all three sentiments exist
+    for sentiment in ['Negative', 'Neutral', 'Positive']:
+        if sentiment not in sentiment_scores:
+            sentiment_scores[sentiment] = 0.0
+    # Get initial confidence
+    initial_confidence = max(sentiment_scores.values())
+    # 1. Check for negation (flips sentiment)
+    has_negation = detect_negation(text)
+    if has_negation:
+        print("🔄 Negation detected - adjusting sentiment")
+        # Swap positive and negative scores
+        temp = sentiment_scores['Positive']
+        sentiment_scores['Positive'] = sentiment_scores['Negative']
+        sentiment_scores['Negative'] = temp
+    # 2. Check for mixed emotions
+    is_mixed = detect_mixed_emotions(text, prosodic_features)
+    if is_mixed:
+        print("🔄 Mixed emotions detected - boosting neutral")
+        # Boost neutral, reduce extremes
+        neutral_boost = 0.25
+        sentiment_scores['Neutral'] = min(0.7, sentiment_scores['Neutral'] + neutral_boost)
+        sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
+        sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
+    # 3. Use prosodic features to adjust confidence
+    # High pitch variation + high energy = strong emotion
+    if prosodic_features['pitch_std'] > 40 and prosodic_features['energy_mean'] > 0.1:
+        print("🎵 Strong emotional prosody detected")
+        # Increase confidence in non-neutral sentiments
+        if sentiment_scores['Positive'] > sentiment_scores['Negative']:
+            sentiment_scores['Positive'] = min(0.9, sentiment_scores['Positive'] * 1.15)
+        else:
+            sentiment_scores['Negative'] = min(0.9, sentiment_scores['Negative'] * 1.15)
+        sentiment_scores['Neutral'] = max(0.05, sentiment_scores['Neutral'] * 0.85)
+    # Low energy + low pitch variation = neutral/calm
+    elif prosodic_features['energy_mean'] < 0.03 and prosodic_features['pitch_std'] < 15:
+        print("🎵 Calm/neutral prosody detected")
+        sentiment_scores['Neutral'] = min(0.8, sentiment_scores['Neutral'] * 1.2)
+    # 4. Normalize scores
+    total = sum(sentiment_scores.values())
+    if total > 0:
+        sentiment_scores = {k: v/total for k, v in sentiment_scores.items()}
+    # Calculate final confidence
+    final_confidence = max(sentiment_scores.values())
+    return sentiment_scores, final_confidence, is_mixed
+# ============================================
+# 6. MAIN PREDICTION FUNCTION
+# ============================================
+def predict(audio_filepath):
+    """
+    Main prediction function with comprehensive error handling
+    """
+    try:
+        print(f"\n{'='*60}")
+        print(f"🎧 Processing audio file...")
+        # Validation
         if audio_filepath is None:
             print("❌ No audio file provided")
+            return {
+                "⚠️ Error": 1.0,
+                "Message": "No audio file uploaded"
+            }
+        print(f"📂 File: {audio_filepath}")
+        # ============================================
+        # STEP 1: Audio Preprocessing
+        # ============================================
         try:
+            audio_processed, sr = preprocess_audio(audio_filepath)
+            prosodic_features = extract_prosodic_features(audio_processed, sr)
+        except Exception as e:
+            print(f"⚠️ Preprocessing error: {e}, using raw audio")
+            audio_processed, sr = librosa.load(audio_filepath, sr=16000)
+            prosodic_features = {
+                'pitch_std': 0, 'energy_mean': 0, 'energy_std': 0,
+                'pitch_mean': 0, 'pitch_range': 0, 'speech_rate': 0,
+                'spectral_centroid_mean': 0
+            }
+        # ============================================
+        # STEP 2: Speech-to-Text (ASR)
+        # ============================================
+        print("🔄 Transcribing audio with IndicWhisper...")
+        try:
+            # Save preprocessed audio temporarily
+            import tempfile
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
+                import soundfile as sf
+                sf.write(temp_audio.name, audio_processed, sr)
+                temp_audio_path = temp_audio.name
+            # Transcribe with Hindi language setting
+            result = asr_pipeline(
+                temp_audio_path,
+                generate_kwargs={
+                    "language": "hindi",
+                    "task": "transcribe"
+                }
+            )
             transcription = result["text"].strip()
+            print(f"📝 Raw transcription: '{transcription}'")
+            # Clean up temp file
+            import os
+            os.unlink(temp_audio_path)
         except Exception as asr_error:
+            print(f"❌ ASR Error: {asr_error}")
+            return {
+                "⚠️ ASR Error": 1.0,
+                "Message": str(asr_error)
+            }
+        # ============================================
+        # STEP 3: Validate Transcription
+        # ============================================
+        if not transcription or len(transcription) < 2:
+            print("⚠️ Empty or too short transcription")
+            return {
+                "⚠️ No Speech Detected": 1.0,
+                "Transcription": transcription or "Empty"
+            }
+        is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
+        print(f"🔍 Language validation: {validation_msg} ({hindi_ratio*100:.1f}% Hindi)")
+        if not is_valid:
+            return {
+                "⚠️ Language Error": 1.0,
+                "Message": validation_msg,
+                "Transcription": transcription
+            }
+        # ============================================
+        # STEP 4: Sentiment Analysis
+        # ============================================
+        print("💭 Analyzing sentiment with XLM-RoBERTa...")
         try:
+            # Get raw sentiment
+            raw_sentiment = sentiment_pipeline(transcription)
+            print(f"📊 Raw sentiment: {raw_sentiment}")
+            # Enhanced analysis
+            sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis(
+                transcription,
+                prosodic_features,
+                raw_sentiment
+            )
+            # ============================================
+            # STEP 5: Format Results
+            # ============================================
             result_dict = {}
+            # Add sentiment scores
+            for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
+                result_dict[f"{sentiment}"] = float(score)
+            # Add metadata
+            result_dict["📝 Transcription"] = transcription
+            result_dict["🎯 Confidence"] = float(confidence)
+            result_dict["🔀 Mixed Emotions"] = "Yes" if is_mixed else "No"
+            result_dict["🌐 Hindi Content"] = f"{hindi_ratio*100:.0f}%"
+            # Log results
+            print(f"��� Analysis complete!")
+            print(f"📝 Transcription: '{transcription}'")
+            print(f"🎯 Confidence: {confidence:.3f}")
+            print(f"🔀 Mixed: {is_mixed}")
+            for sentiment, score in sentiment_scores.items():
+                print(f"   {sentiment}: {score:.3f}")
+            print(f"{'='*60}\n")
             return result_dict
         except Exception as sentiment_error:
+            print(f"❌ Sentiment Error: {sentiment_error}")
+            return {
+                "⚠️ Sentiment Error": 1.0,
+                "Message": str(sentiment_error),
+                "Transcription": transcription
+            }
     except Exception as e:
+        print(f"❌ Critical Error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return {
+            "⚠️ System Error": 1.0,
+            "Message": str(e)
+        }
+# ============================================
+# 7. GRADIO INTERFACE
+# ============================================
 demo = gr.Interface(
+    fn=predict,  # Removed async - not needed for this implementation
     inputs=gr.Audio(
         type="filepath",
         label="🎤 Record or Upload Hindi Audio",
         sources=["upload", "microphone"]
     ),
     outputs=gr.Label(
+        label="🎭 Enhanced Sentiment Analysis Results",
+        num_top_classes=10
     ),
+    title="🎤 Advanced Hindi Speech Sentiment Analysis",
     description="""
+    ## 🇮🇳 Professional-grade Hindi/Hinglish Speech Emotion Analysis
+    ### ✨ Advanced Features:
+    - **🎙️ IndicWhisper ASR** - Best-in-class Hindi transcription
+    - **🧠 XLM-RoBERTa** - Multilingual sentiment analysis
+    - **🎵 Prosodic Analysis** - Voice tone, pitch, energy detection
+    - **🔄 Mixed Emotion Detection** - Handles complex feelings
+    - **🌐 Hinglish Support** - Works with Hindi + English mix
+    - **🎯 Confidence Scoring** - Know how reliable the prediction is
+    - **🔧 Audio Preprocessing** - Noise reduction, normalization
+    ### 🧪 Test Examples:
+    - **😊 Positive**: "मैं बहुत खुश हूं आज" *(I'm very happy today)*
+    - **😢 Negative**: "मुझे बहुत दुख हो रहा है" *(I'm feeling very sad)*
+    - **😐 Neutral**: "मैं घर जा रहा हूं" *(I'm going home)*
+    - **🔀 Mixed**: "कभी खुश हूं कभी उदास" *(Sometimes happy, sometimes sad)*
+    - **💭 Confused**: "समझ नहीं आ रहा क्या करूं" *(Don't understand what to do)*
+    - **🗣️ Hinglish**: "I'm feeling बहुत अच्छा today" *(Mix of languages)*
+    ### 📊 Output Includes:
+    - Sentiment probabilities (Positive/Negative/Neutral)
+    - Exact transcription in Hindi/Devanagari
+    - Confidence score (how sure the model is)
+    - Mixed emotion indicator
+    - Language composition (% Hindi content)
+    ### 💡 Best Practices:
+    1. Speak clearly for 3-10 seconds
+    2. Reduce background noise if possible
+    3. Use natural conversational tone
+    4. Both Hindi and Hinglish are supported
+    ### 🎯 Use Cases:
+    - Mental health tracking
+    - Customer feedback analysis
+    - Call center quality monitoring
+    - Personal diary analysis
+    - Relationship counseling
     """,
     examples=None,
     theme=gr.themes.Soft(),
+    flagging_mode="never",
+    allow_flagging="never"
 )
+# ============================================
+# 8. LAUNCH APP
+# ============================================
 if __name__ == "__main__":
     print("🌐 Starting server...")
     demo.launch(
         server_port=7860,
         show_error=True
     )
+    print("🎉 Enhanced Hindi Sentiment Analysis App is ready!")