Spaces:

Natwar
/

BERT_based_Emotion_Analysis

Sleeping

App Files Files Community

Natwar commited on Apr 12, 2025

Commit

0bc2de3

verified ·

1 Parent(s): f44ef6f

Update app.py

Browse files

Files changed (1) hide show

app.py +296 -45

app.py CHANGED Viewed

@@ -1,56 +1,286 @@
 import subprocess
 import sys
-# Install required packages
-subprocess.check_call([sys.executable, "-m", "pip", "install",
-                      "gradio==3.50.2", "matplotlib", "numpy", "torch", "transformers"])
-import gradio as gr
-import matplotlib.pyplot as plt
-import numpy as np
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Define emotion colors for visualization
 EMOTION_COLORS = {
-    'anger': '#E74C3C',     # Red
     'joy': '#F1C40F',       # Yellow
-    'love': '#E91E63',      # Pink
     'sadness': '#3498DB',   # Blue
     'fear': '#7D3C98',      # Purple
-    'surprise': '#2ECC71'   # Green
 }
-# Load model and tokenizer
-print("Loading model and tokenizer...")
-model_name = "bhadresh-savani/bert-base-uncased-emotion"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-# Get emotion labels
-id2label = model.config.id2label
-emotions_list = list(id2label.values())
-print(f"Model loaded. Emotions: {emotions_list}")
-def analyze_emotion(text):
-    """Analyze emotion in text and return visualization and results"""
     if not text or not text.strip():
         return None, {"error": "Please enter some text to analyze"}
     try:
-        # Tokenize and get prediction
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            scores = torch.nn.functional.softmax(outputs.logits, dim=1).squeeze().numpy()
-        # Get emotion labels and their scores
-        emotion_scores = [(id2label[i], float(scores[i])) for i in range(len(scores))]
-        # Sort by scores
-        emotion_scores.sort(key=lambda x: x[1], reverse=True)
-        emotions, scores = zip(*emotion_scores)
         # Create visualization
         fig = create_visualization(emotions, scores, text)
@@ -62,10 +292,16 @@ def analyze_emotion(text):
             "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
         }
         return fig, output
     except Exception as e:
-        print(f"Error in analyze_emotion: {str(e)}")
         return None, {"error": f"Analysis failed: {str(e)}"}
 def create_visualization(emotions, scores, text=None):
@@ -92,28 +328,43 @@ def create_visualization(emotions, scores, text=None):
         display_text = text if len(text) < 50 else text[:47] + "..."
         ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
     else:
-        ax.set_title('Emotion Analysis', pad=20)
     plt.tight_layout()
     return fig
 # Create Gradio interface
 demo = gr.Interface(
-    fn=analyze_emotion,
-    inputs=gr.Textbox(lines=4, placeholder="Enter text to analyze emotions...", label="Input Text"),
-    outputs=[gr.Plot(label="Emotion Distribution"), gr.JSON(label="Analysis Results")],
-    title="🧠 Emotion Analysis App",
-    description="This app analyzes emotions in text using BERT. It can detect six emotions: anger, joy, love, sadness, fear, and surprise.",
     examples=[
-        ["I'm so excited about this new opportunity!"],
-        ["The news made me very sad and disappointed."],
-        ["I can't believe what just happened! This is totally unexpected!"],
-        ["I'm really angry about how they treated me."],
-        ["I love spending time with my family and friends."],
-        ["I'm terrified of what might happen next."]
-    ]
 )
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import subprocess
 import sys
+import os
+# Check if running in a standard environment (not Colab/Jupyter)
+# and install packages if needed
+if not os.path.exists("/.dockerenv") and not os.path.exists("/kaggle"):
+    try:
+        import transformers
+        import torch
+        import matplotlib
+        import gradio
+    except ImportError:
+        print("Installing required packages...")
+        subprocess.check_call([sys.executable, "-m", "pip", "install",
+                              "transformers", "torch", "matplotlib", "gradio"])
 import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+import gradio as gr
+import re
+print("Setting up custom emotion analysis model...")
+# Enhanced emotion categories with more keywords
+EMOTION_CATEGORIES = {
+    'joy': [
+        'happy', 'joyful', 'delighted', 'pleased', 'excited', 'thrilled', 'cheerful',
+        'content', 'glad', 'elated', 'ecstatic', 'jubilant', 'blissful', 'overjoyed',
+        'satisfied', 'euphoric', 'merry', 'radiant', 'gleeful', 'lighthearted'
+    ],
+    'sadness': [
+        'sad', 'unhappy', 'depressed', 'gloomy', 'miserable', 'disappointed', 'sorrowful',
+        'heartbroken', 'downcast', 'melancholy', 'despondent', 'disheartened', 'grief-stricken',
+        'somber', 'mournful', 'forlorn', 'dejected', 'crestfallen', 'woeful', 'desolate'
+    ],
+    'anger': [
+        'angry', 'furious', 'enraged', 'irritated', 'annoyed', 'resentful', 'irate',
+        'outraged', 'hostile', 'mad', 'incensed', 'livid', 'infuriated', 'seething',
+        'indignant', 'exasperated', 'bitter', 'vexed', 'aggravated', 'fuming'
+    ],
+    'fear': [
+        'afraid', 'scared', 'frightened', 'terrified', 'anxious', 'worried', 'nervous',
+        'panicked', 'horrified', 'dreadful', 'apprehensive', 'petrified', 'paranoid',
+        'alarmed', 'uneasy', 'tense', 'distressed', 'intimidated', 'threatened', 'fearful'
+    ],
+    'surprise': [
+        'surprised', 'amazed', 'astonished', 'shocked', 'stunned', 'startled', 'astounded',
+        'bewildered', 'dumbfounded', 'unexpected', 'awestruck', 'flabbergasted', 'speechless',
+        'taken aback', 'thunderstruck', 'incredulous', 'staggered', 'perplexed', 'agape', 'overwhelmed'
+    ],
+    'love': [
+        'loving', 'affectionate', 'fond', 'adoring', 'caring', 'devoted', 'passionate',
+        'tender', 'warm', 'compassionate', 'enamored', 'cherishing', 'smitten',
+        'infatuated', 'admiring', 'doting', 'treasuring', 'nurturing', 'endearing', 'ardent'
+    ],
+    'sarcasm': [
+        'sarcastic', 'ironic', 'mocking', 'cynical', 'satirical', 'sardonic', 'facetious',
+        'contemptuous', 'sneering', 'scornful', 'caustic', 'biting', 'acerbic', 'cutting',
+        'derisive', 'dry', 'wry', 'tongue-in-cheek', 'insincere', 'patronizing'
+    ]
+}
 # Define emotion colors for visualization
 EMOTION_COLORS = {
     'joy': '#F1C40F',       # Yellow
     'sadness': '#3498DB',   # Blue
+    'anger': '#E74C3C',     # Red
     'fear': '#7D3C98',      # Purple
+    'surprise': '#2ECC71',  # Green
+    'love': '#E91E63',      # Pink
+    'sarcasm': '#FF7F50'    # Coral
 }
+# Load BERT model and tokenizer
+print("Loading BERT model and tokenizer (this may take a moment)...")
+model_name = "bert-base-uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForMaskedLM.from_pretrained(model_name)
+# Set device (use GPU if available)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+print(f"Model loaded successfully. Using device: {device}")
+# Sarcasm indicators - linguistic patterns that might indicate sarcasm
+SARCASM_PATTERNS = [
+    r'\b(?:yeah|sure|right|wow|oh)(?:\s+right|\s+sure|\s+ok|\s+okay)?\s*$',  # Yeah right, Sure ok
+    r'\bso\s+(?:happy|excited|thrilled|glad|impressed)\b',  # So happy/excited (context dependent)
+    r'(?:^|\s)(?:thanks|thank you) for (?:nothing|that|pointing|stating)\b',  # Thanks for nothing
+    r'\b(?:just|exactly|precisely) what (?:I|we) need',  # Just what I need
+    r'\b(?:brilliant|genius|smart|clever|impressive)\b',  # Brilliant, genius (context dependent)
+    r'(?:\!|\?)\s*(?:\!|\?)+',  # Multiple exclamations/question marks
+    r'\bcongratulations\b',  # Congratulations (context dependent)
+    r'(?:^|\s)(?:oh|ah)\s+(?:really|wow|amazing|wonderful)\b',  # Oh really, Ah wonderful
+]
+def detect_sarcasm_patterns(text):
+    """Detect linguistic patterns of sarcasm in text"""
+    # Convert to lowercase for case-insensitive matching
+    text_lower = text.lower()
+    # Check for each sarcasm pattern
+    matches = 0
+    for pattern in SARCASM_PATTERNS:
+        if re.search(pattern, text_lower):
+            matches += 1
+    # Calculate a basic sarcasm score based on matches
+    sarcasm_pattern_score = min(matches / 3, 1.0)  # Cap at 1.0
+    return sarcasm_pattern_score
+def create_emotion_template(emotion_word):
+    """Create a template sentence for emotion prediction"""
+    return f"The text expresses [MASK] emotions. It feels {emotion_word}."
+def create_sarcasm_template():
+    """Create a template sentence for sarcasm prediction"""
+    return "This text is [MASK] sarcastic."
+def predict_masked_token(text, template):
+    """Get predictions for a masked token using BERT"""
+    # Combine text with template
+    full_text = text + " " + template
+    # Tokenize input
+    inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Get mask token position
+    mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+    # Forward pass
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get predictions for mask token
+    logits = outputs.logits
+    mask_token_logits = logits[0, mask_token_index, :]
+    # Get probabilities
+    probs = torch.nn.functional.softmax(mask_token_logits, dim=-1)
+    return probs
+def get_emotion_score(probs, positive_tokens, negative_tokens=None):
+    """Calculate emotion score based on token probabilities"""
+    # Get token IDs for positive and negative words
+    positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in positive_tokens]
+    # Calculate positive score (sum of probabilities of positive tokens)
+    positive_score = sum(probs[0, token_id].item() for token_id in positive_ids)
+    # If negative tokens are provided, subtract their probabilities
+    negative_score = 0
+    if negative_tokens:
+        negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in negative_tokens]
+        negative_score = sum(probs[0, token_id].item() for token_id in negative_ids)
+    return positive_score - negative_score
+def get_sarcasm_score(text, probs):
+    """Calculate sarcasm score based on token probabilities and linguistic patterns"""
+    # Get token IDs for relevant words
+    positive_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                           ['definitely', 'very', 'extremely', 'clearly', 'obviously']]
+    negative_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                           ['not', 'barely', 'hardly', 'slightly', 'somewhat']]
+    # Calculate model-based score
+    positive_score = sum(probs[0, token_id].item() for token_id in positive_sarcasm_ids)
+    negative_score = sum(probs[0, token_id].item() for token_id in negative_sarcasm_ids)
+    model_score = positive_score - negative_score
+    # Get pattern-based score
+    pattern_score = detect_sarcasm_patterns(text)
+    # Check for emotion contradiction (e.g., positive words with negative sentiment)
+    contradiction_score = 0
+    emotions_detected = {}
+    # Simple templates to check for emotional content
+    emotion_check_templates = {
+        'positive': "This text has a [MASK] tone.",  # Check for positive/negative/neutral
+        'intent': "The writer's intent is [MASK]."    # Check for serious/joking/sarcastic
+    }
+    for template_name, template in emotion_check_templates.items():
+        check_probs = predict_masked_token(text, template)
+        if template_name == 'positive':
+            # Check for contradiction between positive words and negative sentiment
+            positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                           ['positive', 'happy', 'good', 'great']]
+            negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                           ['negative', 'sad', 'bad', 'terrible']]
+            positive_sentiment = sum(check_probs[0, token_id].item() for token_id in positive_ids)
+            negative_sentiment = sum(check_probs[0, token_id].item() for token_id in negative_ids)
+            # High scores in both positive and negative can indicate sarcasm
+            contradiction_score += min(positive_sentiment, negative_sentiment) * 2
+        elif template_name == 'intent':
+            # Check if model thinks the intent is sarcastic or joking
+            sarcastic_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                            ['sarcastic', 'ironic', 'joking', 'mocking']]
+            serious_ids = [tokenizer.convert_tokens_to_ids(word) for word in
+                          ['serious', 'sincere', 'honest', 'earnest']]
+            sarcastic_intent = sum(check_probs[0, token_id].item() for token_id in sarcastic_ids)
+            serious_intent = sum(check_probs[0, token_id].item() for token_id in serious_ids)
+            # If sarcastic intent is higher than serious intent, boost sarcasm score
+            if sarcastic_intent > serious_intent:
+                contradiction_score += (sarcastic_intent - serious_intent)
+    # Combine scores - weight model-based prediction, pattern matching, and contradiction detection
+    combined_sarcasm_score = 0.4 * model_score + 0.3 * pattern_score + 0.3 * contradiction_score
+    # Normalize to range [0,1]
+    return max(0, min(combined_sarcasm_score, 1))
+def analyze_emotions(text):
+    """Analyze emotions in text using custom BERT-based approach with sarcasm detection"""
     if not text or not text.strip():
         return None, {"error": "Please enter some text to analyze"}
     try:
+        # Templates for emotion detection
+        emotion_scores = {}
+        # Positive emotion indicator tokens
+        positive_indicators = ['positive', 'strong', 'clear', 'definite', 'evident', 'genuine']
+        # Negative indicators for contrasting emotions
+        negative_indicators = ['negative', 'weak', 'unclear', 'slight', 'fake', 'absent']
+        # For each emotion category
+        for emotion, keywords in EMOTION_CATEGORIES.items():
+            if emotion == 'sarcasm':
+                # Special handling for sarcasm
+                template = create_sarcasm_template()
+                probs = predict_masked_token(text, template)
+                emotion_scores[emotion] = get_sarcasm_score(text, probs)
+                continue
+            # Calculate score for each keyword and take average
+            keyword_scores = []
+            # Use a subset of keywords to improve efficiency
+            selected_keywords = keywords[:10]  # Use first 10 keywords
+            for keyword in selected_keywords:
+                template = create_emotion_template(keyword)
+                probs = predict_masked_token(text, template)
+                score = get_emotion_score(probs, positive_indicators, negative_indicators)
+                keyword_scores.append(score)
+            # Take average score across all keywords for this emotion
+            emotion_scores[emotion] = sum(keyword_scores) / len(keyword_scores)
+        # Normalize scores to ensure they sum to 1
+        min_score = min(emotion_scores.values())
+        max_score = max(emotion_scores.values())
+        score_range = max_score - min_score
+        if score_range > 0:
+            # Normal case - we have a range of scores
+            normalized_scores = {e: (s - min_score) / score_range for e, s in emotion_scores.items()}
+            # Further normalize to sum to 1
+            total = sum(normalized_scores.values())
+            normalized_scores = {e: s / total for e, s in normalized_scores.items()}
+        else:
+            # Edge case - all emotions scored the same
+            normalized_scores = {e: 1/len(emotion_scores) for e in emotion_scores}
+        # Sort emotions by score
+        sorted_emotions = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True)
+        emotions, scores = zip(*sorted_emotions)
         # Create visualization
         fig = create_visualization(emotions, scores, text)
             "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
         }
+        # Add sarcasm note if detected with high confidence
+        if 'sarcasm' in normalized_scores and normalized_scores['sarcasm'] > 0.2:
+            output["note"] = f"Sarcasm detected with {normalized_scores['sarcasm']*100:.1f}% confidence"
         return fig, output
     except Exception as e:
+        import traceback
+        print(f"Error in analyze_emotions: {str(e)}")
+        print(traceback.format_exc())
         return None, {"error": f"Analysis failed: {str(e)}"}
 def create_visualization(emotions, scores, text=None):
         display_text = text if len(text) < 50 else text[:47] + "..."
         ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
     else:
+        ax.set_title('Custom Emotion Analysis', pad=20)
     plt.tight_layout()
     return fig
 # Create Gradio interface
 demo = gr.Interface(
+    fn=analyze_emotions,
+    inputs=gr.Textbox(
+        lines=4,
+        placeholder="Enter text to analyze emotions...",
+        label="Input Text"
+    ),
+    outputs=[
+        gr.Plot(label="Emotion Distribution"),
+        gr.JSON(label="Analysis Results")
+    ],
+    title="🧠 Enhanced Emotion Analysis with Sarcasm Detection",
+    description="""This app analyzes emotions in text using a custom BERT-based approach.
+    It examines how well the input text aligns with seven emotional categories: joy, sadness, anger, fear, surprise, love, and sarcasm.
+    The analysis uses BERT's contextual understanding along with linguistic pattern recognition to evaluate emotional content.""",
     examples=[
+        ["I can't wait for the concert tonight! It's going to be amazing!"],
+        ["The news about the layoffs has left everyone feeling devastated."],
+        ["I'm absolutely furious about how they handled this situation."],
+        ["I'm really nervous about the upcoming presentation."],
+        ["Wow! I didn't expect that plot twist at all!"],
+        ["I deeply cherish the time we spend together."],
+        ["Oh great, another meeting that could have been an email. Just what I needed today."],
+        ["Sure, I'd LOVE to do your work for you. Nothing better than doing two jobs for one salary!"],
+        ["What a FANTASTIC way to start the day - my car won't start and it's pouring rain!"]
+    ],
+    allow_flagging="never"
 )
 # Launch the app
 if __name__ == "__main__":
+    print("Starting Gradio app...")
+    # Use launch parameters that work well in Hugging Face Spaces
+    demo.launch(debug=False)