Spaces:

JustNikunj
/

Sentimental_Analysis

Sleeping

App Files Files Community

JustNikunj commited on Oct 6, 2025

Commit

6b8f285

verified ·

1 Parent(s): 38f9319

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -85

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import librosa
 import numpy as np
 import re
@@ -12,54 +12,73 @@ warnings.filterwarnings('ignore')
 print("🚀 Starting Enhanced Hindi Speech Sentiment Analysis App...")
 # ============================================
-# 1. LOAD MODELS
 # ============================================
-# Load Hindi Sentiment Model
-print("📚 Loading Hindi sentiment analysis model...")
-try:
-    # Use LondonStory's Hindi sentiment model
-    sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
-    sentiment_pipeline = pipeline(
-        "text-classification",
-        model=sentiment_model_name,
-        top_k=None
-    )
-    print("✅ Hindi sentiment model loaded successfully")
-except Exception as e:
-    print(f"❌ Error loading sentiment model: {e}")
-    raise
-# Load IndicWhisper for Hindi ASR (Best for Indian languages)
-print("🎤 Loading IndicWhisper Hindi ASR model...")
-try:
-    from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
-    asr_processor = AutoProcessor.from_pretrained("vasista22/whisper-hindi-medium")
-    asr_model = AutoModelForSpeechSeq2Seq.from_pretrained("vasista22/whisper-hindi-medium")
-    # Create pipeline with the loaded model
-    asr_pipeline = pipeline(
-        "automatic-speech-recognition",
-        model=asr_model,
-        tokenizer=asr_processor.tokenizer,
-        feature_extractor=asr_processor.feature_extractor,
-        device="cpu",
-        chunk_length_s=30
-    )
-    print("✅ IndicWhisper Hindi ASR model loaded successfully")
-except Exception as e:
-    print(f"❌ Error loading IndicWhisper, trying fallback: {e}")
     try:
-        asr_pipeline = pipeline(
-            "automatic-speech-recognition",
-            model="openai/whisper-small",
-            device="cpu"
         )
-        print("✅ Whisper-small fallback loaded successfully")
-    except Exception as e2:
-        print(f"❌ Error loading any ASR model: {e2}")
         raise
 # ============================================
 # 2. AUDIO PREPROCESSING FUNCTIONS
@@ -70,8 +89,6 @@ def preprocess_audio(audio_path, target_sr=16000):
     Advanced audio preprocessing for better ASR accuracy
     """
     try:
-        print("🔧 Preprocessing audio...")
         # Load audio
         audio, sr = librosa.load(audio_path, sr=target_sr, mono=True)
@@ -89,8 +106,6 @@ def preprocess_audio(audio_path, target_sr=16000):
         # 4. Apply noise reduction using spectral gating
         audio_denoised = reduce_noise(audio_emphasized, sr)
-        print(f"✅ Audio preprocessed: {len(audio)//sr}s → {len(audio_denoised)//sr}s (after trim)")
         return audio_denoised, sr
     except Exception as e:
@@ -162,8 +177,6 @@ def extract_prosodic_features(audio, sr):
         spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
         features['spectral_centroid_mean'] = np.mean(spectral_centroid)
-        print(f"🎵 Prosodic features: Pitch STD={features['pitch_std']:.1f}, Energy={features['energy_mean']:.3f}")
         return features
     except Exception as e:
@@ -245,7 +258,7 @@ def detect_mixed_emotions(text, prosodic_features):
     has_negative = any(word in text_lower for word in negative_words)
     # Prosodic indicators of mixed emotions
-    high_pitch_variation = prosodic_features['pitch_std'] > 30  # High variation suggests uncertainty
     high_energy_variation = prosodic_features['energy_std'] > 0.05
     # Combine signals
@@ -254,16 +267,13 @@ def detect_mixed_emotions(text, prosodic_features):
     is_mixed = text_mixed or audio_mixed
-    if is_mixed:
-        print(f"🔄 Mixed emotions detected: Text={text_mixed}, Audio={audio_mixed}")
     return is_mixed
 def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
     """
     Enhanced sentiment analysis combining text and prosodic features
     """
-    # Parse raw results - handle different model formats
     sentiment_scores = {}
     # Check if results are in the expected format
@@ -299,7 +309,6 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
     has_negation = detect_negation(text)
     if has_negation:
         print("🔄 Negation detected - adjusting sentiment")
-        # Swap positive and negative scores
         temp = sentiment_scores['Positive']
         sentiment_scores['Positive'] = sentiment_scores['Negative']
         sentiment_scores['Negative'] = temp
@@ -308,24 +317,20 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
     is_mixed = detect_mixed_emotions(text, prosodic_features)
     if is_mixed:
         print("🔄 Mixed emotions detected - boosting neutral")
-        # Boost neutral, reduce extremes
         neutral_boost = 0.25
         sentiment_scores['Neutral'] = min(0.7, sentiment_scores['Neutral'] + neutral_boost)
         sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
         sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
     # 3. Use prosodic features to adjust confidence
-    # High pitch variation + high energy = strong emotion
     if prosodic_features['pitch_std'] > 40 and prosodic_features['energy_mean'] > 0.1:
         print("🎵 Strong emotional prosody detected")
-        # Increase confidence in non-neutral sentiments
         if sentiment_scores['Positive'] > sentiment_scores['Negative']:
             sentiment_scores['Positive'] = min(0.9, sentiment_scores['Positive'] * 1.15)
         else:
             sentiment_scores['Negative'] = min(0.9, sentiment_scores['Negative'] * 1.15)
         sentiment_scores['Neutral'] = max(0.05, sentiment_scores['Neutral'] * 0.85)
-    # Low energy + low pitch variation = neutral/calm
     elif prosodic_features['energy_mean'] < 0.03 and prosodic_features['pitch_std'] < 15:
         print("🎵 Calm/neutral prosody detected")
         sentiment_scores['Neutral'] = min(0.8, sentiment_scores['Neutral'] * 1.2)
@@ -346,7 +351,7 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
 def predict(audio_filepath):
     """
-    Main prediction function with comprehensive error handling
     """
     try:
         print(f"\n{'='*60}")
@@ -354,14 +359,11 @@ def predict(audio_filepath):
         # Validation
         if audio_filepath is None:
-            print("❌ No audio file provided")
             return {
                 "⚠️ Error": 1.0,
                 "Message": "No audio file uploaded"
             }
-        print(f"📂 File: {audio_filepath}")
         # ============================================
         # STEP 1: Audio Preprocessing
         # ============================================
@@ -378,12 +380,11 @@ def predict(audio_filepath):
             }
         # ============================================
-        # STEP 2: Speech-to-Text (ASR)
         # ============================================
-        print("🔄 Transcribing audio with Whisper...")
         try:
-            # Transcribe with Hindi language setting
-            result = asr_pipeline(
                 audio_filepath,
                 generate_kwargs={
                     "language": "hindi",
@@ -392,7 +393,7 @@ def predict(audio_filepath):
             )
             transcription = result["text"].strip()
-            print(f"📝 Raw transcription: '{transcription}'")
         except Exception as asr_error:
             print(f"❌ ASR Error: {asr_error}")
@@ -405,14 +406,13 @@ def predict(audio_filepath):
         # STEP 3: Validate Transcription
         # ============================================
         if not transcription or len(transcription) < 2:
-            print("⚠️ Empty or too short transcription")
             return {
                 "⚠️ No Speech Detected": 1.0,
                 "Transcription": transcription or "Empty"
             }
         is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
-        print(f"🔍 Language validation: {validation_msg} ({hindi_ratio*100:.1f}% Hindi)")
         if not is_valid:
             return {
@@ -422,15 +422,12 @@ def predict(audio_filepath):
             }
         # ============================================
-        # STEP 4: Sentiment Analysis
         # ============================================
-        print("💭 Analyzing sentiment with XLM-RoBERTa...")
         try:
-            # Get raw sentiment
-            raw_sentiment = sentiment_pipeline(transcription)
-            print(f"📊 Raw sentiment: {raw_sentiment}")
-            # Enhanced analysis
             sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis(
                 transcription,
                 prosodic_features,
@@ -442,23 +439,15 @@ def predict(audio_filepath):
             # ============================================
             result_dict = {}
-            # Add sentiment scores
             for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
                 result_dict[f"{sentiment}"] = float(score)
-            # Add metadata
             result_dict["📝 Transcription"] = transcription
             result_dict["🎯 Confidence"] = float(confidence)
             result_dict["🔀 Mixed Emotions"] = "Yes" if is_mixed else "No"
             result_dict["🌐 Hindi Content"] = f"{hindi_ratio*100:.0f}%"
-            # Log results
-            print(f"✅ Analysis complete!")
-            print(f"📝 Transcription: '{transcription}'")
-            print(f"🎯 Confidence: {confidence:.3f}")
-            print(f"🔀 Mixed: {is_mixed}")
-            for sentiment, score in sentiment_scores.items():
-                print(f"   {sentiment}: {score:.3f}")
             print(f"{'='*60}\n")
             return result_dict
@@ -507,6 +496,7 @@ demo = gr.Interface(
     - **🌐 Hinglish Support** - Works with Hindi + English mix
     - **🎯 Confidence Scoring** - Know how reliable the prediction is
     - **🔧 Audio Preprocessing** - Noise reduction, normalization
     ### 🧪 Test Examples:
     - **😊 Positive**: "मैं बहुत खुश हूं आज" *(I'm very happy today)*

 import gradio as gr
 import torch
+from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
 import librosa
 import numpy as np
 import re
 print("🚀 Starting Enhanced Hindi Speech Sentiment Analysis App...")
 # ============================================
+# 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
 # ============================================
+# Global variables to store loaded models
+SENTIMENT_PIPELINE = None
+ASR_PIPELINE = None
+ASR_PROCESSOR = None
+ASR_MODEL = None
+def load_models():
+    """
+    Load all models once at startup and cache them globally
+    """
+    global SENTIMENT_PIPELINE, ASR_PIPELINE, ASR_PROCESSOR, ASR_MODEL
+    # Check if already loaded
+    if SENTIMENT_PIPELINE is not None and ASR_PIPELINE is not None:
+        print("✅ Models already loaded, skipping...")
+        return
+    # Load Hindi Sentiment Model
+    print("📚 Loading Hindi sentiment analysis model...")
     try:
+        sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
+        SENTIMENT_PIPELINE = pipeline(
+            "text-classification",
+            model=sentiment_model_name,
+            top_k=None
         )
+        print("✅ Hindi sentiment model loaded successfully")
+    except Exception as e:
+        print(f"❌ Error loading sentiment model: {e}")
         raise
+    # Load IndicWhisper for Hindi ASR
+    print("🎤 Loading IndicWhisper Hindi ASR model...")
+    try:
+        ASR_PROCESSOR = AutoProcessor.from_pretrained("vasista22/whisper-hindi-medium")
+        ASR_MODEL = AutoModelForSpeechSeq2Seq.from_pretrained("vasista22/whisper-hindi-medium")
+        # Create pipeline with the loaded model
+        ASR_PIPELINE = pipeline(
+            "automatic-speech-recognition",
+            model=ASR_MODEL,
+            tokenizer=ASR_PROCESSOR.tokenizer,
+            feature_extractor=ASR_PROCESSOR.feature_extractor,
+            device="cpu",
+            chunk_length_s=30
+        )
+        print("✅ IndicWhisper Hindi ASR model loaded successfully")
+    except Exception as e:
+        print(f"❌ Error loading IndicWhisper, trying fallback: {e}")
+        try:
+            ASR_PIPELINE = pipeline(
+                "automatic-speech-recognition",
+                model="openai/whisper-small",
+                device="cpu"
+            )
+            print("✅ Whisper-small fallback loaded successfully")
+        except Exception as e2:
+            print(f"❌ Error loading any ASR model: {e2}")
+            raise
+    print("✅ All models loaded and cached in memory")
+# Load models at startup
+load_models()
 # ============================================
 # 2. AUDIO PREPROCESSING FUNCTIONS
     Advanced audio preprocessing for better ASR accuracy
     """
     try:
         # Load audio
         audio, sr = librosa.load(audio_path, sr=target_sr, mono=True)
         # 4. Apply noise reduction using spectral gating
         audio_denoised = reduce_noise(audio_emphasized, sr)
         return audio_denoised, sr
     except Exception as e:
         spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
         features['spectral_centroid_mean'] = np.mean(spectral_centroid)
         return features
     except Exception as e:
     has_negative = any(word in text_lower for word in negative_words)
     # Prosodic indicators of mixed emotions
+    high_pitch_variation = prosodic_features['pitch_std'] > 30
     high_energy_variation = prosodic_features['energy_std'] > 0.05
     # Combine signals
     is_mixed = text_mixed or audio_mixed
     return is_mixed
 def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
     """
     Enhanced sentiment analysis combining text and prosodic features
     """
+    # Parse raw results
     sentiment_scores = {}
     # Check if results are in the expected format
     has_negation = detect_negation(text)
     if has_negation:
         print("🔄 Negation detected - adjusting sentiment")
         temp = sentiment_scores['Positive']
         sentiment_scores['Positive'] = sentiment_scores['Negative']
         sentiment_scores['Negative'] = temp
     is_mixed = detect_mixed_emotions(text, prosodic_features)
     if is_mixed:
         print("🔄 Mixed emotions detected - boosting neutral")
         neutral_boost = 0.25
         sentiment_scores['Neutral'] = min(0.7, sentiment_scores['Neutral'] + neutral_boost)
         sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
         sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
     # 3. Use prosodic features to adjust confidence
     if prosodic_features['pitch_std'] > 40 and prosodic_features['energy_mean'] > 0.1:
         print("🎵 Strong emotional prosody detected")
         if sentiment_scores['Positive'] > sentiment_scores['Negative']:
             sentiment_scores['Positive'] = min(0.9, sentiment_scores['Positive'] * 1.15)
         else:
             sentiment_scores['Negative'] = min(0.9, sentiment_scores['Negative'] * 1.15)
         sentiment_scores['Neutral'] = max(0.05, sentiment_scores['Neutral'] * 0.85)
     elif prosodic_features['energy_mean'] < 0.03 and prosodic_features['pitch_std'] < 15:
         print("🎵 Calm/neutral prosody detected")
         sentiment_scores['Neutral'] = min(0.8, sentiment_scores['Neutral'] * 1.2)
 def predict(audio_filepath):
     """
+    Main prediction function - uses pre-loaded global models
     """
     try:
         print(f"\n{'='*60}")
         # Validation
         if audio_filepath is None:
             return {
                 "⚠️ Error": 1.0,
                 "Message": "No audio file uploaded"
             }
         # ============================================
         # STEP 1: Audio Preprocessing
         # ============================================
             }
         # ============================================
+        # STEP 2: Speech-to-Text (ASR) - Using cached model
         # ============================================
+        print("🔄 Transcribing with cached IndicWhisper model...")
         try:
+            result = ASR_PIPELINE(
                 audio_filepath,
                 generate_kwargs={
                     "language": "hindi",
             )
             transcription = result["text"].strip()
+            print(f"📝 Transcription: '{transcription}'")
         except Exception as asr_error:
             print(f"❌ ASR Error: {asr_error}")
         # STEP 3: Validate Transcription
         # ============================================
         if not transcription or len(transcription) < 2:
             return {
                 "⚠️ No Speech Detected": 1.0,
                 "Transcription": transcription or "Empty"
             }
         is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
+        print(f"🔍 {validation_msg} ({hindi_ratio*100:.1f}% Hindi)")
         if not is_valid:
             return {
             }
         # ============================================
+        # STEP 4: Sentiment Analysis - Using cached model
         # ============================================
+        print("💭 Analyzing sentiment with cached model...")
         try:
+            raw_sentiment = SENTIMENT_PIPELINE(transcription)
             sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis(
                 transcription,
                 prosodic_features,
             # ============================================
             result_dict = {}
             for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
                 result_dict[f"{sentiment}"] = float(score)
             result_dict["📝 Transcription"] = transcription
             result_dict["🎯 Confidence"] = float(confidence)
             result_dict["🔀 Mixed Emotions"] = "Yes" if is_mixed else "No"
             result_dict["🌐 Hindi Content"] = f"{hindi_ratio*100:.0f}%"
+            print(f"✅ Complete! Confidence: {confidence:.3f}")
             print(f"{'='*60}\n")
             return result_dict
     - **🌐 Hinglish Support** - Works with Hindi + English mix
     - **🎯 Confidence Scoring** - Know how reliable the prediction is
     - **🔧 Audio Preprocessing** - Noise reduction, normalization
+    - **⚡ Cached Models** - Fast predictions after first load
     ### 🧪 Test Examples:
     - **😊 Positive**: "मैं बहुत खुश हूं आज" *(I'm very happy today)*