Spaces:

JustNikunj
/

Sentimental_Analysis

Sleeping

App Files Files Community

JustNikunj commited on Sep 11, 2025

Commit

9e93862

verified ·

1 Parent(s): 6a78c4f

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -110

app.py CHANGED Viewed

@@ -1,153 +1,156 @@
 import gradio as gr
 import torch
-import torchaudio
-from transformers import AutoModelForCTC, AutoProcessor, pipeline
-from pydub import AudioSegment
-import numpy as np
 import librosa
-import io
-import tempfile
-# Load ASR model and processor for Hindi speech recognition
-print("Loading ASR model...")
 try:
-    # Try to load the Hindi model with language modeling
-    from transformers import Wav2Vec2ProcessorWithLM
-    asr_processor = Wav2Vec2ProcessorWithLM.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    asr_model = AutoModelForCTC.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    print("Loaded Hindi model with language modeling")
 except Exception as e:
-    print(f"Failed to load Hindi model with LM: {e}")
-    print("Falling back to basic processor...")
-    # Fallback to basic processor
-    from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
-    asr_processor = Wav2Vec2Processor.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    asr_model = Wav2Vec2ForCTC.from_pretrained("ai4bharat/indicwav2vec-hindi")
-    print("Loaded Hindi model with basic processor")
-# Load sentiment analysis pipeline for Hindi text
-print("Loading sentiment analysis model...")
-sentiment_pipeline = pipeline(
-    "text-classification",
-    model="LondonStory/txlm-roberta-hindi-sentiment",
-    return_all_scores=True
-)
-# Move models to appropriate device (CPU for free Hugging Face Space)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_model.to(device)
-print(f"Models loaded on device: {device}")
 def predict(audio_filepath):
     """
-    Main prediction function that processes Hindi audio and returns sentiment analysis.
-    Args:
-        audio_filepath: Path to the uploaded audio file
-    Returns:
-        Dictionary with sentiment labels and confidence scores
     """
     try:
-        # Load and preprocess audio
-        print(f"Processing audio file: {audio_filepath}")
-        # Load audio using librosa and resample to 16kHz as required by the ASR model
-        audio_array, sample_rate = librosa.load(audio_filepath, sr=16000)
-        # Ensure audio is in the correct format
-        if len(audio_array.shape) > 1:
-            audio_array = np.mean(audio_array, axis=1)
-        # Process audio with ASR processor
-        inputs = asr_processor(
-            audio_array,
-            sampling_rate=16000,
-            return_tensors="pt",
-            padding=True
-        )
-        # Move inputs to device
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Transcribe audio to Hindi text
-        with torch.no_grad():
-            logits = asr_model(**inputs).logits
-        # Get predicted token IDs
-        predicted_ids = torch.argmax(logits, dim=-1)
-        # Decode the transcription
-        transcription = asr_processor.batch_decode(predicted_ids)[0]
-        print(f"Transcribed text: {transcription}")
-        # Handle empty transcription
-        if not transcription.strip():
-            print("Empty transcription detected")
-            return {"No Speech Detected": 1.0}
-        # Perform sentiment analysis on the transcribed text
-        sentiment_results = sentiment_pipeline(transcription)
-        # Format results for Gradio
-        result_dict = {}
-        for result in sentiment_results[0]:
-            label = result['label']
-            score = result['score']
-            result_dict[label] = float(score)
-        # Add transcription info (but not as a score since Gradio Label expects numbers)
-        print(f"Successfully processed. Transcription: {transcription}")
-        return result_dict
     except Exception as e:
-        print(f"Error processing audio: {str(e)}")
-        # Return a properly formatted error response for Gradio
         return {"Processing Error": 1.0}
 # Create Gradio interface
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Audio(
-        type="filepath",
-        label="Upload Hindi Speech",
         sources=["upload", "microphone"]
     ),
     outputs=gr.Label(
-        label="Sentiment Analysis Result",
-        num_top_classes=3
     ),
-    title="🎤 Hindi Speech Sentiment Analysis",
     description="""
-    ### Upload or record Hindi audio to analyze sentiment
-    This app performs the following steps:
-    1. **Speech Recognition**: Converts your Hindi speech to text using AI4Bharat's IndicWav2Vec model
-    2. **Sentiment Analysis**: Analyzes the emotional tone using a specialized Hindi sentiment model
-    **Instructions**:
-    - Upload an audio file or record directly using the microphone
-    - Speak clearly in Hindi for best results
-    - The results show sentiment confidence scores
-    - Check the logs below to see the transcribed text
-    **Supported sentiments**: Positive, Negative, and Neutral with confidence scores
-    **Test phrases**: Try "मैं बहुत खुश हूं" (positive) or "मुझे यह पसंद नहीं है" (negative)
     """,
     examples=None,
     theme=gr.themes.Soft(),
-    allow_flagging="never"
 )
 # Launch the app
 if __name__ == "__main__":
-    # Launch with share=True for public access, queue for handling multiple requests
     demo.launch(
-        share=False,  # Set to True if you want a public link for testing
-        server_name="0.0.0.0",  # Required for Hugging Face Spaces
-        server_port=7860,  # Default port for Hugging Face Spaces
         show_error=True
     )

 import gradio as gr
 import torch
+from transformers import pipeline
 import librosa
+import numpy as np
+print("🚀 Starting Hindi Speech Sentiment Analysis App...")
+# Load sentiment analysis model
+print("📚 Loading sentiment analysis model...")
 try:
+    sentiment_pipeline = pipeline(
+        "text-classification",
+        model="LondonStory/txlm-roberta-hindi-sentiment",
+        top_k=None
+    )
+    print("✅ Sentiment model loaded successfully")
 except Exception as e:
+    print(f"❌ Error loading sentiment model: {e}")
+# Use a simpler, more reliable ASR approach with Whisper
+print("🎤 Loading Whisper ASR model...")
+try:
+    # Use OpenAI Whisper for more reliable transcription
+    asr_pipeline = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-small",
+        chunk_length_s=30,
+        device="cpu"
+    )
+    print("✅ Whisper ASR model loaded successfully")
+except Exception as e:
+    print(f"❌ Error loading Whisper model: {e}")
+    # Fallback to basic multilingual model
+    try:
+        asr_pipeline = pipeline(
+            "automatic-speech-recognition",
+            model="facebook/wav2vec2-base-960h",
+            device="cpu"
+        )
+        print("✅ Fallback ASR model loaded successfully")
+    except Exception as e2:
+        print(f"❌ Error loading fallback model: {e2}")
 def predict(audio_filepath):
     """
+    Process audio and return sentiment analysis using Whisper + LondonStory
     """
     try:
+        print(f"\n{'='*50}")
+        print(f"🎧 Processing new audio file...")
+        if audio_filepath is None:
+            print("❌ No audio file provided")
+            return {"⚠️ No Audio": 1.0}
+        print(f"📂 File path: {audio_filepath}")
+        # Transcribe audio using Whisper
+        print("🔄 Transcribing audio with Whisper...")
+        try:
+            result = asr_pipeline(audio_filepath)
+            transcription = result["text"].strip()
+            print(f"📝 Whisper transcription: '{transcription}'")
+            # Handle empty transcription
+            if not transcription:
+                print("⚠️ Empty transcription from Whisper")
+                return {"No Speech": 1.0}
+        except Exception as asr_error:
+            print(f"❌ Whisper ASR Error: {asr_error}")
+            return {"ASR Error": 1.0}
+        # Perform sentiment analysis
+        print("💭 Analyzing sentiment with LondonStory model...")
+        try:
+            sentiment_results = sentiment_pipeline(transcription)
+            print(f"📊 Raw sentiment results: {sentiment_results}")
+            # Format results for Gradio
+            result_dict = {}
+            for result in sentiment_results:
+                label = result['label']
+                score = result['score']
+                result_dict[label] = float(score)
+            # Log success details
+            print(f"✅ SUCCESS! Processing completed")
+            print(f"📝 Final transcription: '{transcription}'")
+            for label, score in result_dict.items():
+                print(f"📊 {label}: {score:.3f}")
+            print(f"{'='*50}\n")
+            return result_dict
+        except Exception as sentiment_error:
+            print(f"❌ Sentiment Analysis Error: {sentiment_error}")
+            return {"Sentiment Error": 1.0}
     except Exception as e:
+        print(f"❌ General Error: {str(e)}")
         return {"Processing Error": 1.0}
 # Create Gradio interface
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Audio(
+        type="filepath",
+        label="🎤 Record or Upload Hindi Audio",
         sources=["upload", "microphone"]
     ),
     outputs=gr.Label(
+        label="🎭 Sentiment Analysis Results",
+        num_top_classes=5
     ),
+    title="🎤 Hindi Speech Sentiment Analysis (Whisper + AI)",
     description="""
+    ## 🇮🇳 Analyze sentiment from Hindi speech using Whisper AI
+    ### 🔄 How it works:
+    1. **🎤 Whisper ASR** → Converts your Hindi speech to Devanagari text
+    2. **💭 LondonStory AI** → Analyzes sentiment with confidence scores
+    ### 🧪 Test Phrases (speak clearly):
+    - **😊 Happy**: "मैं बहुत खुश हूं" *(Main bahut khush hun)*
+    - **😠 Sad**: "मुझे दुख है" *(Mujhe dukh hai)*
+    - **😐 Neutral**: "यह ठीक है" *(Yeh theek hai)*
+    - **❤️ Love**: "मुझे यह पसंद है" *(Mujhe yeh pasand hai)*
+    - **👎 Dislike**: "यह अच्छा नहीं है" *(Yeh accha nahi hai)*
+    ### 📋 Instructions:
+    1. Click the microphone to record or upload an audio file
+    2. Speak clearly in Hindi for 3-5 seconds
+    3. Click Submit and check results + logs below
+    ### 🔍 Features:
+    - **Powered by OpenAI Whisper** for accurate Hindi transcription
+    - **Specialized Hindi sentiment model** for emotion analysis
+    - **Real-time processing** with detailed logging
+    - **Supports various Hindi accents** and speaking styles
     """,
     examples=None,
     theme=gr.themes.Soft(),
+    flagging_mode="never"
 )
 # Launch the app
 if __name__ == "__main__":
+    print("🌐 Starting server...")
     demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
         show_error=True
     )
+    print("🎉 Whisper + Hindi Sentiment Analysis App is ready!")