Natwar commited on
Commit
0bc2de3
·
verified ·
1 Parent(s): f44ef6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +296 -45
app.py CHANGED
@@ -1,56 +1,286 @@
1
  import subprocess
2
  import sys
 
3
 
4
- # Install required packages
5
- subprocess.check_call([sys.executable, "-m", "pip", "install",
6
- "gradio==3.50.2", "matplotlib", "numpy", "torch", "transformers"])
 
 
 
 
 
 
 
 
 
7
 
8
- import gradio as gr
9
- import matplotlib.pyplot as plt
10
- import numpy as np
11
  import torch
12
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Define emotion colors for visualization
15
  EMOTION_COLORS = {
16
- 'anger': '#E74C3C', # Red
17
  'joy': '#F1C40F', # Yellow
18
- 'love': '#E91E63', # Pink
19
  'sadness': '#3498DB', # Blue
 
20
  'fear': '#7D3C98', # Purple
21
- 'surprise': '#2ECC71' # Green
 
 
22
  }
23
 
24
- # Load model and tokenizer
25
- print("Loading model and tokenizer...")
26
- model_name = "bhadresh-savani/bert-base-uncased-emotion"
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # Get emotion labels
31
- id2label = model.config.id2label
32
- emotions_list = list(id2label.values())
33
- print(f"Model loaded. Emotions: {emotions_list}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def analyze_emotion(text):
36
- """Analyze emotion in text and return visualization and results"""
37
  if not text or not text.strip():
38
  return None, {"error": "Please enter some text to analyze"}
39
 
40
  try:
41
- # Tokenize and get prediction
42
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
 
 
43
 
44
- with torch.no_grad():
45
- outputs = model(**inputs)
46
- scores = torch.nn.functional.softmax(outputs.logits, dim=1).squeeze().numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Get emotion labels and their scores
49
- emotion_scores = [(id2label[i], float(scores[i])) for i in range(len(scores))]
 
 
50
 
51
- # Sort by scores
52
- emotion_scores.sort(key=lambda x: x[1], reverse=True)
53
- emotions, scores = zip(*emotion_scores)
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Create visualization
56
  fig = create_visualization(emotions, scores, text)
@@ -62,10 +292,16 @@ def analyze_emotion(text):
62
  "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
63
  }
64
 
 
 
 
 
65
  return fig, output
66
 
67
  except Exception as e:
68
- print(f"Error in analyze_emotion: {str(e)}")
 
 
69
  return None, {"error": f"Analysis failed: {str(e)}"}
70
 
71
  def create_visualization(emotions, scores, text=None):
@@ -92,28 +328,43 @@ def create_visualization(emotions, scores, text=None):
92
  display_text = text if len(text) < 50 else text[:47] + "..."
93
  ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
94
  else:
95
- ax.set_title('Emotion Analysis', pad=20)
96
 
97
  plt.tight_layout()
98
  return fig
99
 
100
  # Create Gradio interface
101
  demo = gr.Interface(
102
- fn=analyze_emotion,
103
- inputs=gr.Textbox(lines=4, placeholder="Enter text to analyze emotions...", label="Input Text"),
104
- outputs=[gr.Plot(label="Emotion Distribution"), gr.JSON(label="Analysis Results")],
105
- title="🧠 Emotion Analysis App",
106
- description="This app analyzes emotions in text using BERT. It can detect six emotions: anger, joy, love, sadness, fear, and surprise.",
 
 
 
 
 
 
 
 
 
107
  examples=[
108
- ["I'm so excited about this new opportunity!"],
109
- ["The news made me very sad and disappointed."],
110
- ["I can't believe what just happened! This is totally unexpected!"],
111
- ["I'm really angry about how they treated me."],
112
- ["I love spending time with my family and friends."],
113
- ["I'm terrified of what might happen next."]
114
- ]
 
 
 
 
115
  )
116
 
117
  # Launch the app
118
  if __name__ == "__main__":
119
- demo.launch()
 
 
 
1
  import subprocess
2
  import sys
3
+ import os
4
 
5
+ # Check if running in a standard environment (not Colab/Jupyter)
6
+ # and install packages if needed
7
+ if not os.path.exists("/.dockerenv") and not os.path.exists("/kaggle"):
8
+ try:
9
+ import transformers
10
+ import torch
11
+ import matplotlib
12
+ import gradio
13
+ except ImportError:
14
+ print("Installing required packages...")
15
+ subprocess.check_call([sys.executable, "-m", "pip", "install",
16
+ "transformers", "torch", "matplotlib", "gradio"])
17
 
 
 
 
18
  import torch
19
+ import numpy as np
20
+ import matplotlib.pyplot as plt
21
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
22
+ import gradio as gr
23
+ import re
24
+
25
+ print("Setting up custom emotion analysis model...")
26
+
27
+ # Enhanced emotion categories with more keywords
28
+ EMOTION_CATEGORIES = {
29
+ 'joy': [
30
+ 'happy', 'joyful', 'delighted', 'pleased', 'excited', 'thrilled', 'cheerful',
31
+ 'content', 'glad', 'elated', 'ecstatic', 'jubilant', 'blissful', 'overjoyed',
32
+ 'satisfied', 'euphoric', 'merry', 'radiant', 'gleeful', 'lighthearted'
33
+ ],
34
+ 'sadness': [
35
+ 'sad', 'unhappy', 'depressed', 'gloomy', 'miserable', 'disappointed', 'sorrowful',
36
+ 'heartbroken', 'downcast', 'melancholy', 'despondent', 'disheartened', 'grief-stricken',
37
+ 'somber', 'mournful', 'forlorn', 'dejected', 'crestfallen', 'woeful', 'desolate'
38
+ ],
39
+ 'anger': [
40
+ 'angry', 'furious', 'enraged', 'irritated', 'annoyed', 'resentful', 'irate',
41
+ 'outraged', 'hostile', 'mad', 'incensed', 'livid', 'infuriated', 'seething',
42
+ 'indignant', 'exasperated', 'bitter', 'vexed', 'aggravated', 'fuming'
43
+ ],
44
+ 'fear': [
45
+ 'afraid', 'scared', 'frightened', 'terrified', 'anxious', 'worried', 'nervous',
46
+ 'panicked', 'horrified', 'dreadful', 'apprehensive', 'petrified', 'paranoid',
47
+ 'alarmed', 'uneasy', 'tense', 'distressed', 'intimidated', 'threatened', 'fearful'
48
+ ],
49
+ 'surprise': [
50
+ 'surprised', 'amazed', 'astonished', 'shocked', 'stunned', 'startled', 'astounded',
51
+ 'bewildered', 'dumbfounded', 'unexpected', 'awestruck', 'flabbergasted', 'speechless',
52
+ 'taken aback', 'thunderstruck', 'incredulous', 'staggered', 'perplexed', 'agape', 'overwhelmed'
53
+ ],
54
+ 'love': [
55
+ 'loving', 'affectionate', 'fond', 'adoring', 'caring', 'devoted', 'passionate',
56
+ 'tender', 'warm', 'compassionate', 'enamored', 'cherishing', 'smitten',
57
+ 'infatuated', 'admiring', 'doting', 'treasuring', 'nurturing', 'endearing', 'ardent'
58
+ ],
59
+ 'sarcasm': [
60
+ 'sarcastic', 'ironic', 'mocking', 'cynical', 'satirical', 'sardonic', 'facetious',
61
+ 'contemptuous', 'sneering', 'scornful', 'caustic', 'biting', 'acerbic', 'cutting',
62
+ 'derisive', 'dry', 'wry', 'tongue-in-cheek', 'insincere', 'patronizing'
63
+ ]
64
+ }
65
 
66
  # Define emotion colors for visualization
67
  EMOTION_COLORS = {
 
68
  'joy': '#F1C40F', # Yellow
 
69
  'sadness': '#3498DB', # Blue
70
+ 'anger': '#E74C3C', # Red
71
  'fear': '#7D3C98', # Purple
72
+ 'surprise': '#2ECC71', # Green
73
+ 'love': '#E91E63', # Pink
74
+ 'sarcasm': '#FF7F50' # Coral
75
  }
76
 
77
+ # Load BERT model and tokenizer
78
+ print("Loading BERT model and tokenizer (this may take a moment)...")
79
+ model_name = "bert-base-uncased"
80
  tokenizer = AutoTokenizer.from_pretrained(model_name)
81
+ model = AutoModelForMaskedLM.from_pretrained(model_name)
82
+
83
+ # Set device (use GPU if available)
84
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
+ model = model.to(device)
86
+ print(f"Model loaded successfully. Using device: {device}")
87
+
88
+ # Sarcasm indicators - linguistic patterns that might indicate sarcasm
89
+ SARCASM_PATTERNS = [
90
+ r'\b(?:yeah|sure|right|wow|oh)(?:\s+right|\s+sure|\s+ok|\s+okay)?\s*$', # Yeah right, Sure ok
91
+ r'\bso\s+(?:happy|excited|thrilled|glad|impressed)\b', # So happy/excited (context dependent)
92
+ r'(?:^|\s)(?:thanks|thank you) for (?:nothing|that|pointing|stating)\b', # Thanks for nothing
93
+ r'\b(?:just|exactly|precisely) what (?:I|we) need', # Just what I need
94
+ r'\b(?:brilliant|genius|smart|clever|impressive)\b', # Brilliant, genius (context dependent)
95
+ r'(?:\!|\?)\s*(?:\!|\?)+', # Multiple exclamations/question marks
96
+ r'\bcongratulations\b', # Congratulations (context dependent)
97
+ r'(?:^|\s)(?:oh|ah)\s+(?:really|wow|amazing|wonderful)\b', # Oh really, Ah wonderful
98
+ ]
99
+
100
+ def detect_sarcasm_patterns(text):
101
+ """Detect linguistic patterns of sarcasm in text"""
102
+ # Convert to lowercase for case-insensitive matching
103
+ text_lower = text.lower()
104
+
105
+ # Check for each sarcasm pattern
106
+ matches = 0
107
+ for pattern in SARCASM_PATTERNS:
108
+ if re.search(pattern, text_lower):
109
+ matches += 1
110
+
111
+ # Calculate a basic sarcasm score based on matches
112
+ sarcasm_pattern_score = min(matches / 3, 1.0) # Cap at 1.0
113
+
114
+ return sarcasm_pattern_score
115
+
116
+ def create_emotion_template(emotion_word):
117
+ """Create a template sentence for emotion prediction"""
118
+ return f"The text expresses [MASK] emotions. It feels {emotion_word}."
119
+
120
+ def create_sarcasm_template():
121
+ """Create a template sentence for sarcasm prediction"""
122
+ return "This text is [MASK] sarcastic."
123
+
124
+ def predict_masked_token(text, template):
125
+ """Get predictions for a masked token using BERT"""
126
+ # Combine text with template
127
+ full_text = text + " " + template
128
+
129
+ # Tokenize input
130
+ inputs = tokenizer(full_text, return_tensors="pt", truncation=True, max_length=512)
131
+ inputs = {k: v.to(device) for k, v in inputs.items()}
132
+
133
+ # Get mask token position
134
+ mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
135
+
136
+ # Forward pass
137
+ with torch.no_grad():
138
+ outputs = model(**inputs)
139
+
140
+ # Get predictions for mask token
141
+ logits = outputs.logits
142
+ mask_token_logits = logits[0, mask_token_index, :]
143
+
144
+ # Get probabilities
145
+ probs = torch.nn.functional.softmax(mask_token_logits, dim=-1)
146
+
147
+ return probs
148
 
149
+ def get_emotion_score(probs, positive_tokens, negative_tokens=None):
150
+ """Calculate emotion score based on token probabilities"""
151
+ # Get token IDs for positive and negative words
152
+ positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in positive_tokens]
153
+
154
+ # Calculate positive score (sum of probabilities of positive tokens)
155
+ positive_score = sum(probs[0, token_id].item() for token_id in positive_ids)
156
+
157
+ # If negative tokens are provided, subtract their probabilities
158
+ negative_score = 0
159
+ if negative_tokens:
160
+ negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in negative_tokens]
161
+ negative_score = sum(probs[0, token_id].item() for token_id in negative_ids)
162
+
163
+ return positive_score - negative_score
164
+
165
+ def get_sarcasm_score(text, probs):
166
+ """Calculate sarcasm score based on token probabilities and linguistic patterns"""
167
+ # Get token IDs for relevant words
168
+ positive_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
169
+ ['definitely', 'very', 'extremely', 'clearly', 'obviously']]
170
+ negative_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
171
+ ['not', 'barely', 'hardly', 'slightly', 'somewhat']]
172
+
173
+ # Calculate model-based score
174
+ positive_score = sum(probs[0, token_id].item() for token_id in positive_sarcasm_ids)
175
+ negative_score = sum(probs[0, token_id].item() for token_id in negative_sarcasm_ids)
176
+ model_score = positive_score - negative_score
177
+
178
+ # Get pattern-based score
179
+ pattern_score = detect_sarcasm_patterns(text)
180
+
181
+ # Check for emotion contradiction (e.g., positive words with negative sentiment)
182
+ contradiction_score = 0
183
+ emotions_detected = {}
184
+
185
+ # Simple templates to check for emotional content
186
+ emotion_check_templates = {
187
+ 'positive': "This text has a [MASK] tone.", # Check for positive/negative/neutral
188
+ 'intent': "The writer's intent is [MASK]." # Check for serious/joking/sarcastic
189
+ }
190
+
191
+ for template_name, template in emotion_check_templates.items():
192
+ check_probs = predict_masked_token(text, template)
193
+
194
+ if template_name == 'positive':
195
+ # Check for contradiction between positive words and negative sentiment
196
+ positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in
197
+ ['positive', 'happy', 'good', 'great']]
198
+ negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in
199
+ ['negative', 'sad', 'bad', 'terrible']]
200
+
201
+ positive_sentiment = sum(check_probs[0, token_id].item() for token_id in positive_ids)
202
+ negative_sentiment = sum(check_probs[0, token_id].item() for token_id in negative_ids)
203
+
204
+ # High scores in both positive and negative can indicate sarcasm
205
+ contradiction_score += min(positive_sentiment, negative_sentiment) * 2
206
+
207
+ elif template_name == 'intent':
208
+ # Check if model thinks the intent is sarcastic or joking
209
+ sarcastic_ids = [tokenizer.convert_tokens_to_ids(word) for word in
210
+ ['sarcastic', 'ironic', 'joking', 'mocking']]
211
+ serious_ids = [tokenizer.convert_tokens_to_ids(word) for word in
212
+ ['serious', 'sincere', 'honest', 'earnest']]
213
+
214
+ sarcastic_intent = sum(check_probs[0, token_id].item() for token_id in sarcastic_ids)
215
+ serious_intent = sum(check_probs[0, token_id].item() for token_id in serious_ids)
216
+
217
+ # If sarcastic intent is higher than serious intent, boost sarcasm score
218
+ if sarcastic_intent > serious_intent:
219
+ contradiction_score += (sarcastic_intent - serious_intent)
220
+
221
+ # Combine scores - weight model-based prediction, pattern matching, and contradiction detection
222
+ combined_sarcasm_score = 0.4 * model_score + 0.3 * pattern_score + 0.3 * contradiction_score
223
+
224
+ # Normalize to range [0,1]
225
+ return max(0, min(combined_sarcasm_score, 1))
226
 
227
+ def analyze_emotions(text):
228
+ """Analyze emotions in text using custom BERT-based approach with sarcasm detection"""
229
  if not text or not text.strip():
230
  return None, {"error": "Please enter some text to analyze"}
231
 
232
  try:
233
+ # Templates for emotion detection
234
+ emotion_scores = {}
235
+
236
+ # Positive emotion indicator tokens
237
+ positive_indicators = ['positive', 'strong', 'clear', 'definite', 'evident', 'genuine']
238
+
239
+ # Negative indicators for contrasting emotions
240
+ negative_indicators = ['negative', 'weak', 'unclear', 'slight', 'fake', 'absent']
241
 
242
+ # For each emotion category
243
+ for emotion, keywords in EMOTION_CATEGORIES.items():
244
+ if emotion == 'sarcasm':
245
+ # Special handling for sarcasm
246
+ template = create_sarcasm_template()
247
+ probs = predict_masked_token(text, template)
248
+ emotion_scores[emotion] = get_sarcasm_score(text, probs)
249
+ continue
250
+
251
+ # Calculate score for each keyword and take average
252
+ keyword_scores = []
253
+
254
+ # Use a subset of keywords to improve efficiency
255
+ selected_keywords = keywords[:10] # Use first 10 keywords
256
+
257
+ for keyword in selected_keywords:
258
+ template = create_emotion_template(keyword)
259
+ probs = predict_masked_token(text, template)
260
+ score = get_emotion_score(probs, positive_indicators, negative_indicators)
261
+ keyword_scores.append(score)
262
+
263
+ # Take average score across all keywords for this emotion
264
+ emotion_scores[emotion] = sum(keyword_scores) / len(keyword_scores)
265
 
266
+ # Normalize scores to ensure they sum to 1
267
+ min_score = min(emotion_scores.values())
268
+ max_score = max(emotion_scores.values())
269
+ score_range = max_score - min_score
270
 
271
+ if score_range > 0:
272
+ # Normal case - we have a range of scores
273
+ normalized_scores = {e: (s - min_score) / score_range for e, s in emotion_scores.items()}
274
+ # Further normalize to sum to 1
275
+ total = sum(normalized_scores.values())
276
+ normalized_scores = {e: s / total for e, s in normalized_scores.items()}
277
+ else:
278
+ # Edge case - all emotions scored the same
279
+ normalized_scores = {e: 1/len(emotion_scores) for e in emotion_scores}
280
+
281
+ # Sort emotions by score
282
+ sorted_emotions = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True)
283
+ emotions, scores = zip(*sorted_emotions)
284
 
285
  # Create visualization
286
  fig = create_visualization(emotions, scores, text)
 
292
  "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
293
  }
294
 
295
+ # Add sarcasm note if detected with high confidence
296
+ if 'sarcasm' in normalized_scores and normalized_scores['sarcasm'] > 0.2:
297
+ output["note"] = f"Sarcasm detected with {normalized_scores['sarcasm']*100:.1f}% confidence"
298
+
299
  return fig, output
300
 
301
  except Exception as e:
302
+ import traceback
303
+ print(f"Error in analyze_emotions: {str(e)}")
304
+ print(traceback.format_exc())
305
  return None, {"error": f"Analysis failed: {str(e)}"}
306
 
307
  def create_visualization(emotions, scores, text=None):
 
328
  display_text = text if len(text) < 50 else text[:47] + "..."
329
  ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
330
  else:
331
+ ax.set_title('Custom Emotion Analysis', pad=20)
332
 
333
  plt.tight_layout()
334
  return fig
335
 
336
  # Create Gradio interface
337
  demo = gr.Interface(
338
+ fn=analyze_emotions,
339
+ inputs=gr.Textbox(
340
+ lines=4,
341
+ placeholder="Enter text to analyze emotions...",
342
+ label="Input Text"
343
+ ),
344
+ outputs=[
345
+ gr.Plot(label="Emotion Distribution"),
346
+ gr.JSON(label="Analysis Results")
347
+ ],
348
+ title="🧠 Enhanced Emotion Analysis with Sarcasm Detection",
349
+ description="""This app analyzes emotions in text using a custom BERT-based approach.
350
+ It examines how well the input text aligns with seven emotional categories: joy, sadness, anger, fear, surprise, love, and sarcasm.
351
+ The analysis uses BERT's contextual understanding along with linguistic pattern recognition to evaluate emotional content.""",
352
  examples=[
353
+ ["I can't wait for the concert tonight! It's going to be amazing!"],
354
+ ["The news about the layoffs has left everyone feeling devastated."],
355
+ ["I'm absolutely furious about how they handled this situation."],
356
+ ["I'm really nervous about the upcoming presentation."],
357
+ ["Wow! I didn't expect that plot twist at all!"],
358
+ ["I deeply cherish the time we spend together."],
359
+ ["Oh great, another meeting that could have been an email. Just what I needed today."],
360
+ ["Sure, I'd LOVE to do your work for you. Nothing better than doing two jobs for one salary!"],
361
+ ["What a FANTASTIC way to start the day - my car won't start and it's pouring rain!"]
362
+ ],
363
+ allow_flagging="never"
364
  )
365
 
366
  # Launch the app
367
  if __name__ == "__main__":
368
+ print("Starting Gradio app...")
369
+ # Use launch parameters that work well in Hugging Face Spaces
370
+ demo.launch(debug=False)