Natwar commited on
Commit
c3f5238
·
verified ·
1 Parent(s): 0bc2de3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +270 -162
app.py CHANGED
@@ -21,45 +21,39 @@ import matplotlib.pyplot as plt
21
  from transformers import AutoModelForMaskedLM, AutoTokenizer
22
  import gradio as gr
23
  import re
 
24
 
25
- print("Setting up custom emotion analysis model...")
26
 
27
- # Enhanced emotion categories with more keywords
28
  EMOTION_CATEGORIES = {
29
  'joy': [
30
- 'happy', 'joyful', 'delighted', 'pleased', 'excited', 'thrilled', 'cheerful',
31
- 'content', 'glad', 'elated', 'ecstatic', 'jubilant', 'blissful', 'overjoyed',
32
- 'satisfied', 'euphoric', 'merry', 'radiant', 'gleeful', 'lighthearted'
33
  ],
34
  'sadness': [
35
- 'sad', 'unhappy', 'depressed', 'gloomy', 'miserable', 'disappointed', 'sorrowful',
36
- 'heartbroken', 'downcast', 'melancholy', 'despondent', 'disheartened', 'grief-stricken',
37
- 'somber', 'mournful', 'forlorn', 'dejected', 'crestfallen', 'woeful', 'desolate'
38
  ],
39
  'anger': [
40
- 'angry', 'furious', 'enraged', 'irritated', 'annoyed', 'resentful', 'irate',
41
- 'outraged', 'hostile', 'mad', 'incensed', 'livid', 'infuriated', 'seething',
42
- 'indignant', 'exasperated', 'bitter', 'vexed', 'aggravated', 'fuming'
43
  ],
44
  'fear': [
45
- 'afraid', 'scared', 'frightened', 'terrified', 'anxious', 'worried', 'nervous',
46
- 'panicked', 'horrified', 'dreadful', 'apprehensive', 'petrified', 'paranoid',
47
- 'alarmed', 'uneasy', 'tense', 'distressed', 'intimidated', 'threatened', 'fearful'
48
  ],
49
  'surprise': [
50
- 'surprised', 'amazed', 'astonished', 'shocked', 'stunned', 'startled', 'astounded',
51
- 'bewildered', 'dumbfounded', 'unexpected', 'awestruck', 'flabbergasted', 'speechless',
52
- 'taken aback', 'thunderstruck', 'incredulous', 'staggered', 'perplexed', 'agape', 'overwhelmed'
53
  ],
54
  'love': [
55
- 'loving', 'affectionate', 'fond', 'adoring', 'caring', 'devoted', 'passionate',
56
- 'tender', 'warm', 'compassionate', 'enamored', 'cherishing', 'smitten',
57
- 'infatuated', 'admiring', 'doting', 'treasuring', 'nurturing', 'endearing', 'ardent'
58
  ],
59
  'sarcasm': [
60
- 'sarcastic', 'ironic', 'mocking', 'cynical', 'satirical', 'sardonic', 'facetious',
61
- 'contemptuous', 'sneering', 'scornful', 'caustic', 'biting', 'acerbic', 'cutting',
62
- 'derisive', 'dry', 'wry', 'tongue-in-cheek', 'insincere', 'patronizing'
63
  ]
64
  }
65
 
@@ -74,6 +68,10 @@ EMOTION_COLORS = {
74
  'sarcasm': '#FF7F50' # Coral
75
  }
76
 
 
 
 
 
77
  # Load BERT model and tokenizer
78
  print("Loading BERT model and tokenizer (this may take a moment)...")
79
  model_name = "bert-base-uncased"
@@ -85,41 +83,124 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
  model = model.to(device)
86
  print(f"Model loaded successfully. Using device: {device}")
87
 
88
- # Sarcasm indicators - linguistic patterns that might indicate sarcasm
89
  SARCASM_PATTERNS = [
90
- r'\b(?:yeah|sure|right|wow|oh)(?:\s+right|\s+sure|\s+ok|\s+okay)?\s*$', # Yeah right, Sure ok
91
- r'\bso\s+(?:happy|excited|thrilled|glad|impressed)\b', # So happy/excited (context dependent)
92
- r'(?:^|\s)(?:thanks|thank you) for (?:nothing|that|pointing|stating)\b', # Thanks for nothing
93
- r'\b(?:just|exactly|precisely) what (?:I|we) need', # Just what I need
94
- r'\b(?:brilliant|genius|smart|clever|impressive)\b', # Brilliant, genius (context dependent)
95
- r'(?:\!|\?)\s*(?:\!|\?)+', # Multiple exclamations/question marks
96
- r'\bcongratulations\b', # Congratulations (context dependent)
97
- r'(?:^|\s)(?:oh|ah)\s+(?:really|wow|amazing|wonderful)\b', # Oh really, Ah wonderful
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ]
99
 
100
- def detect_sarcasm_patterns(text):
101
- """Detect linguistic patterns of sarcasm in text"""
102
- # Convert to lowercase for case-insensitive matching
103
- text_lower = text.lower()
104
 
105
- # Check for each sarcasm pattern
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  matches = 0
 
 
107
  for pattern in SARCASM_PATTERNS:
108
- if re.search(pattern, text_lower):
109
  matches += 1
 
 
 
 
110
 
111
- # Calculate a basic sarcasm score based on matches
112
- sarcasm_pattern_score = min(matches / 3, 1.0) # Cap at 1.0
 
113
 
114
- return sarcasm_pattern_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- def create_emotion_template(emotion_word):
117
- """Create a template sentence for emotion prediction"""
118
- return f"The text expresses [MASK] emotions. It feels {emotion_word}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- def create_sarcasm_template():
121
- """Create a template sentence for sarcasm prediction"""
122
- return "This text is [MASK] sarcastic."
 
 
 
 
 
 
 
 
123
 
124
  def predict_masked_token(text, template):
125
  """Get predictions for a masked token using BERT"""
@@ -146,140 +227,165 @@ def predict_masked_token(text, template):
146
 
147
  return probs
148
 
149
- def get_emotion_score(probs, positive_tokens, negative_tokens=None):
150
- """Calculate emotion score based on token probabilities"""
151
- # Get token IDs for positive and negative words
152
- positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in positive_tokens]
153
-
154
- # Calculate positive score (sum of probabilities of positive tokens)
155
- positive_score = sum(probs[0, token_id].item() for token_id in positive_ids)
156
-
157
- # If negative tokens are provided, subtract their probabilities
158
- negative_score = 0
159
- if negative_tokens:
160
- negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in negative_tokens]
 
 
 
 
 
 
 
 
161
  negative_score = sum(probs[0, token_id].item() for token_id in negative_ids)
 
 
 
 
162
 
163
- return positive_score - negative_score
 
164
 
165
- def get_sarcasm_score(text, probs):
166
- """Calculate sarcasm score based on token probabilities and linguistic patterns"""
 
 
 
 
167
  # Get token IDs for relevant words
168
- positive_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
169
- ['definitely', 'very', 'extremely', 'clearly', 'obviously']]
170
- negative_sarcasm_ids = [tokenizer.convert_tokens_to_ids(word) for word in
171
- ['not', 'barely', 'hardly', 'slightly', 'somewhat']]
172
 
173
- # Calculate model-based score
174
- positive_score = sum(probs[0, token_id].item() for token_id in positive_sarcasm_ids)
175
- negative_score = sum(probs[0, token_id].item() for token_id in negative_sarcasm_ids)
176
- model_score = positive_score - negative_score
177
 
178
- # Get pattern-based score
179
  pattern_score = detect_sarcasm_patterns(text)
180
 
181
- # Check for emotion contradiction (e.g., positive words with negative sentiment)
182
- contradiction_score = 0
183
- emotions_detected = {}
184
 
185
- # Simple templates to check for emotional content
186
- emotion_check_templates = {
187
- 'positive': "This text has a [MASK] tone.", # Check for positive/negative/neutral
188
- 'intent': "The writer's intent is [MASK]." # Check for serious/joking/sarcastic
189
- }
190
 
191
- for template_name, template in emotion_check_templates.items():
192
- check_probs = predict_masked_token(text, template)
193
-
194
- if template_name == 'positive':
195
- # Check for contradiction between positive words and negative sentiment
196
- positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in
197
- ['positive', 'happy', 'good', 'great']]
198
- negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in
199
- ['negative', 'sad', 'bad', 'terrible']]
200
-
201
- positive_sentiment = sum(check_probs[0, token_id].item() for token_id in positive_ids)
202
- negative_sentiment = sum(check_probs[0, token_id].item() for token_id in negative_ids)
203
-
204
- # High scores in both positive and negative can indicate sarcasm
205
- contradiction_score += min(positive_sentiment, negative_sentiment) * 2
206
-
207
- elif template_name == 'intent':
208
- # Check if model thinks the intent is sarcastic or joking
209
- sarcastic_ids = [tokenizer.convert_tokens_to_ids(word) for word in
210
- ['sarcastic', 'ironic', 'joking', 'mocking']]
211
- serious_ids = [tokenizer.convert_tokens_to_ids(word) for word in
212
- ['serious', 'sincere', 'honest', 'earnest']]
213
-
214
- sarcastic_intent = sum(check_probs[0, token_id].item() for token_id in sarcastic_ids)
215
- serious_intent = sum(check_probs[0, token_id].item() for token_id in serious_ids)
216
-
217
- # If sarcastic intent is higher than serious intent, boost sarcasm score
218
- if sarcastic_intent > serious_intent:
219
- contradiction_score += (sarcastic_intent - serious_intent)
220
-
221
- # Combine scores - weight model-based prediction, pattern matching, and contradiction detection
222
- combined_sarcasm_score = 0.4 * model_score + 0.3 * pattern_score + 0.3 * contradiction_score
223
-
224
- # Normalize to range [0,1]
225
- return max(0, min(combined_sarcasm_score, 1))
 
 
226
 
227
  def analyze_emotions(text):
228
- """Analyze emotions in text using custom BERT-based approach with sarcasm detection"""
229
  if not text or not text.strip():
230
  return None, {"error": "Please enter some text to analyze"}
231
 
232
  try:
233
- # Templates for emotion detection
234
  emotion_scores = {}
235
 
236
- # Positive emotion indicator tokens
237
- positive_indicators = ['positive', 'strong', 'clear', 'definite', 'evident', 'genuine']
238
-
239
- # Negative indicators for contrasting emotions
240
- negative_indicators = ['negative', 'weak', 'unclear', 'slight', 'fake', 'absent']
241
-
242
- # For each emotion category
243
  for emotion, keywords in EMOTION_CATEGORIES.items():
244
  if emotion == 'sarcasm':
245
- # Special handling for sarcasm
246
- template = create_sarcasm_template()
247
- probs = predict_masked_token(text, template)
248
- emotion_scores[emotion] = get_sarcasm_score(text, probs)
249
  continue
250
 
251
- # Calculate score for each keyword and take average
252
- keyword_scores = []
253
-
254
- # Use a subset of keywords to improve efficiency
255
- selected_keywords = keywords[:10] # Use first 10 keywords
256
-
257
- for keyword in selected_keywords:
258
- template = create_emotion_template(keyword)
259
- probs = predict_masked_token(text, template)
260
- score = get_emotion_score(probs, positive_indicators, negative_indicators)
261
- keyword_scores.append(score)
262
-
263
- # Take average score across all keywords for this emotion
264
- emotion_scores[emotion] = sum(keyword_scores) / len(keyword_scores)
265
 
266
- # Normalize scores to ensure they sum to 1
267
- min_score = min(emotion_scores.values())
268
- max_score = max(emotion_scores.values())
269
- score_range = max_score - min_score
 
 
 
270
 
271
- if score_range > 0:
272
- # Normal case - we have a range of scores
273
- normalized_scores = {e: (s - min_score) / score_range for e, s in emotion_scores.items()}
274
- # Further normalize to sum to 1
275
- total = sum(normalized_scores.values())
276
- normalized_scores = {e: s / total for e, s in normalized_scores.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  else:
278
- # Edge case - all emotions scored the same
279
- normalized_scores = {e: 1/len(emotion_scores) for e in emotion_scores}
 
 
 
 
 
 
 
 
280
 
281
  # Sort emotions by score
282
- sorted_emotions = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True)
283
  emotions, scores = zip(*sorted_emotions)
284
 
285
  # Create visualization
@@ -292,9 +398,11 @@ def analyze_emotions(text):
292
  "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
293
  }
294
 
295
- # Add sarcasm note if detected with high confidence
296
- if 'sarcasm' in normalized_scores and normalized_scores['sarcasm'] > 0.2:
297
- output["note"] = f"Sarcasm detected with {normalized_scores['sarcasm']*100:.1f}% confidence"
 
 
298
 
299
  return fig, output
300
 
@@ -328,7 +436,7 @@ def create_visualization(emotions, scores, text=None):
328
  display_text = text if len(text) < 50 else text[:47] + "..."
329
  ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
330
  else:
331
- ax.set_title('Custom Emotion Analysis', pad=20)
332
 
333
  plt.tight_layout()
334
  return fig
@@ -345,10 +453,10 @@ demo = gr.Interface(
345
  gr.Plot(label="Emotion Distribution"),
346
  gr.JSON(label="Analysis Results")
347
  ],
348
- title="🧠 Enhanced Emotion Analysis with Sarcasm Detection",
349
- description="""This app analyzes emotions in text using a custom BERT-based approach.
350
- It examines how well the input text aligns with seven emotional categories: joy, sadness, anger, fear, surprise, love, and sarcasm.
351
- The analysis uses BERT's contextual understanding along with linguistic pattern recognition to evaluate emotional content.""",
352
  examples=[
353
  ["I can't wait for the concert tonight! It's going to be amazing!"],
354
  ["The news about the layoffs has left everyone feeling devastated."],
 
21
  from transformers import AutoModelForMaskedLM, AutoTokenizer
22
  import gradio as gr
23
  import re
24
+ from collections import Counter
25
 
26
+ print("Setting up BERT-based emotion analysis model...")
27
 
28
+ # Enhanced emotion categories with carefully selected keywords
29
  EMOTION_CATEGORIES = {
30
  'joy': [
31
+ 'happy', 'joyful', 'delighted', 'excited', 'cheerful',
32
+ 'glad', 'elated', 'jubilant', 'overjoyed', 'pleased'
 
33
  ],
34
  'sadness': [
35
+ 'sad', 'unhappy', 'depressed', 'disappointed', 'sorrowful',
36
+ 'heartbroken', 'melancholy', 'grief', 'somber', 'mournful'
 
37
  ],
38
  'anger': [
39
+ 'angry', 'furious', 'enraged', 'irritated', 'annoyed',
40
+ 'outraged', 'hostile', 'mad', 'infuriated', 'indignant'
 
41
  ],
42
  'fear': [
43
+ 'afraid', 'scared', 'frightened', 'terrified', 'anxious',
44
+ 'worried', 'nervous', 'panicked', 'horrified', 'apprehensive'
 
45
  ],
46
  'surprise': [
47
+ 'surprised', 'amazed', 'astonished', 'shocked', 'stunned',
48
+ 'startled', 'astounded', 'bewildered', 'unexpected', 'awestruck'
 
49
  ],
50
  'love': [
51
+ 'loving', 'affectionate', 'fond', 'adoring', 'caring',
52
+ 'devoted', 'passionate', 'tender', 'compassionate', 'cherishing'
 
53
  ],
54
  'sarcasm': [
55
+ 'sarcastic', 'ironic', 'mocking', 'cynical', 'satirical',
56
+ 'sardonic', 'facetious', 'contemptuous', 'caustic', 'biting'
 
57
  ]
58
  }
59
 
 
68
  'sarcasm': '#FF7F50' # Coral
69
  }
70
 
71
+ # Common positive and negative words for context analysis
72
+ POSITIVE_WORDS = ['great', 'good', 'wonderful', 'amazing', 'excellent', 'fantastic', 'terrific', 'perfect', 'lovely', 'awesome']
73
+ NEGATIVE_WORDS = ['bad', 'terrible', 'awful', 'horrible', 'poor', 'dreadful', 'disappointing', 'unpleasant', 'lousy', 'pathetic']
74
+
75
  # Load BERT model and tokenizer
76
  print("Loading BERT model and tokenizer (this may take a moment)...")
77
  model_name = "bert-base-uncased"
 
83
  model = model.to(device)
84
  print(f"Model loaded successfully. Using device: {device}")
85
 
86
+ # Sarcasm indicators - carefully revised linguistic patterns that indicate sarcasm
87
  SARCASM_PATTERNS = [
88
+ # Exaggerated expressions with specific punctuation/capitalization patterns
89
+ r'(?i)\b(?:so+|really|absolutely|totally|completely)\s+(?:thrilled|excited|happy|delighted)\s+(?:about|with|by)\b.*?(?:\!{2,}|\?{2,})',
90
+
91
+ # Classic sarcastic phrases
92
+ r'(?i)\bjust\s+what\s+(?:I|we)\s+(?:need|wanted|hoped for)\b',
93
+ r'(?i)\bhow\s+(?:wonderful|nice|great|lovely|exciting)\b.*?(?:\!|\?{2,})',
94
+
95
+ # Contrasting statements
96
+ r'(?i)\b(?:love|enjoy|adore)\b.*?\bnot\b',
97
+
98
+ # Quotation marks around positive words (scare quotes)
99
+ r'(?i)"(?:great|wonderful|excellent|perfect|amazing)"',
100
+
101
+ # Typical sarcastic responses
102
+ r'(?i)^\s*(?:yeah|sure|right)\s+(?:ok|okay|whatever)\b',
103
+
104
+ # Exaggerated praise in negative context
105
+ r'(?i)\b(?:brilliant|genius|impressive)\b.*?(?:terrible|awful|disaster|failure)',
106
+
107
+ # Obvious understatements
108
+ r'(?i)\bslightly\s+(?:catastrophic|disastrous|terrible|awful)\b',
109
+
110
+ # Emphasis on positive with hint of negative (requires context check)
111
+ r'(?i)\bso+\s+(?:happy|excited|thrilled|glad)'
112
  ]
113
 
114
+ def tokenize_and_clean(text):
115
+ """Tokenize text and convert to lowercase"""
116
+ # Remove extra spaces and convert to lowercase
117
+ text = re.sub(r'\s+', ' ', text.lower().strip())
118
 
119
+ # Simple tokenization by splitting on spaces and removing punctuation
120
+ tokens = re.findall(r'\b\w+\b', text)
121
+ return tokens
122
+
123
+ def count_sentiment_words(text):
124
+ """Count positive and negative words in text"""
125
+ tokens = tokenize_and_clean(text)
126
+ positive_count = sum(1 for word in tokens if word in POSITIVE_WORDS)
127
+ negative_count = sum(1 for word in tokens if word in NEGATIVE_WORDS)
128
+ return positive_count, negative_count
129
+
130
+ def detect_sarcasm_patterns(text):
131
+ """Detect linguistic patterns of sarcasm in text with context awareness"""
132
+ # Match sarcasm patterns
133
  matches = 0
134
+ pattern_matches = []
135
+
136
  for pattern in SARCASM_PATTERNS:
137
+ if re.search(pattern, text):
138
  matches += 1
139
+ pattern_matches.append(pattern)
140
+
141
+ # Check for sentiment polarity mismatch
142
+ positive_count, negative_count = count_sentiment_words(text)
143
 
144
+ # Context-based signals
145
+ exclamation_count = text.count('!')
146
+ question_marks = text.count('?')
147
 
148
+ # Check for positive words in negative contexts or vice versa
149
+ sentiment_mismatch = 0
150
+ if positive_count > 0 and negative_count > 0:
151
+ # If both positive and negative words exist, it's a potential indicator
152
+ sentiment_mismatch = min(positive_count, negative_count) / max(positive_count, negative_count, 1)
153
+
154
+ # Check for excessive punctuation - potential sarcasm indicator
155
+ excessive_punctuation = 0
156
+ if exclamation_count > 2 or question_marks > 2:
157
+ excessive_punctuation = 0.2
158
+
159
+ # Check for ALL CAPS words (excluding common acronyms)
160
+ caps_words = re.findall(r'\b[A-Z]{3,}\b', text)
161
+ caps_emphasis = len(caps_words) * 0.1 # Each caps word adds weight
162
+
163
+ # Combined sarcasm score
164
+ raw_score = (matches * 0.15) + (sentiment_mismatch * 0.5) + excessive_punctuation + caps_emphasis
165
+
166
+ # Normalize to [0, 1]
167
+ return min(raw_score, 1.0)
168
 
169
+ def detect_extreme_incongruity(text):
170
+ """Detect extreme incongruity between sentiment and content"""
171
+ # Count positive and negative words
172
+ positive_count, negative_count = count_sentiment_words(text)
173
+
174
+ # Check for specific incongruous phrases
175
+ incongruous_phrases = [
176
+ (r'(?i)\b(?:love|adore|enjoy)\b.*?\b(?:hate|despise|detest)\b', 0.7), # "I love how much I hate this"
177
+ (r'(?i)\b(?:wonderful|great|excellent)\b.*?\b(?:terrible|awful|horrible)\b', 0.8), # "What a wonderful disaster"
178
+ (r'(?i)\b(?:thankful|grateful)\b.*?\b(?:worst|annoying|frustrating)\b', 0.6), # "So thankful for this frustrating experience"
179
+ ]
180
+
181
+ incongruity_score = 0
182
+ for pattern, weight in incongruous_phrases:
183
+ if re.search(pattern, text):
184
+ incongruity_score += weight
185
+
186
+ # Check for extreme emotional inconsistency
187
+ if positive_count > 2 and negative_count > 2:
188
+ # Significant presence of both positive and negative sentiment is suspicious
189
+ incongruity_score += 0.4
190
+
191
+ return min(incongruity_score, 1.0)
192
 
193
+ def create_emotion_template(emotion, keyword):
194
+ """Create a template sentence for emotion prediction"""
195
+ templates = [
196
+ f"The text expresses [MASK] {emotion} emotions.",
197
+ f"This text shows [MASK] {emotion} feelings.",
198
+ f"The writer feels [MASK] {keyword}.",
199
+ f"The sentiment in this text is [MASK] {keyword}."
200
+ ]
201
+
202
+ # Use a consistent template for now, but this could be randomized
203
+ return templates[0]
204
 
205
  def predict_masked_token(text, template):
206
  """Get predictions for a masked token using BERT"""
 
227
 
228
  return probs
229
 
230
+ def get_emotion_score(text, emotion, keywords):
231
+ """Calculate emotion score based on multiple template predictions"""
232
+ # Positive and negative indicator tokens
233
+ positive_indicators = ['clearly', 'definitely', 'strongly', 'very', 'extremely']
234
+ negative_indicators = ['not', 'barely', 'hardly', 'slightly', 'somewhat']
235
+
236
+ # Get scores for each keyword using different templates
237
+ keyword_scores = []
238
+
239
+ # Use a subset of keywords for efficiency
240
+ for keyword in keywords[:5]: # Use just 5 keywords per emotion for efficiency
241
+ template = create_emotion_template(emotion, keyword)
242
+ probs = predict_masked_token(text, template)
243
+
244
+ # Get token IDs for positive and negative words
245
+ positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in positive_indicators]
246
+ negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in negative_indicators]
247
+
248
+ # Calculate positive score (sum of probabilities of positive tokens)
249
+ positive_score = sum(probs[0, token_id].item() for token_id in positive_ids)
250
  negative_score = sum(probs[0, token_id].item() for token_id in negative_ids)
251
+
252
+ # Final score for this keyword
253
+ score = positive_score - negative_score
254
+ keyword_scores.append(score)
255
 
256
+ # Return average score across all keywords
257
+ return sum(keyword_scores) / len(keyword_scores)
258
 
259
+ def analyze_sarcasm(text):
260
+ """Specialized analysis for sarcasm detection using multiple methods"""
261
+ # 1. Direct sarcasm template prediction
262
+ template = "This text is [MASK] sarcastic."
263
+ probs = predict_masked_token(text, template)
264
+
265
  # Get token IDs for relevant words
266
+ positive_ids = [tokenizer.convert_tokens_to_ids(word) for word in
267
+ ['definitely', 'very', 'extremely', 'clearly', 'obviously']]
268
+ negative_ids = [tokenizer.convert_tokens_to_ids(word) for word in
269
+ ['not', 'barely', 'hardly', 'slightly', 'somewhat']]
270
 
271
+ bert_score = sum(probs[0, token_id].item() for token_id in positive_ids) - \
272
+ sum(probs[0, token_id].item() for token_id in negative_ids)
 
 
273
 
274
+ # 2. Linguistic pattern detection
275
  pattern_score = detect_sarcasm_patterns(text)
276
 
277
+ # 3. Sentiment incongruity detection
278
+ incongruity_score = detect_extreme_incongruity(text)
 
279
 
280
+ # 4. Check intent
281
+ intent_template = "The writer's intent is [MASK]."
282
+ intent_probs = predict_masked_token(text, intent_template)
 
 
283
 
284
+ sarcastic_intent_ids = [tokenizer.convert_tokens_to_ids(word) for word in
285
+ ['sarcastic', 'ironic', 'mocking', 'joking']]
286
+ sincere_intent_ids = [tokenizer.convert_tokens_to_ids(word) for word in
287
+ ['sincere', 'serious', 'honest', 'genuine']]
288
+
289
+ intent_score = sum(intent_probs[0, token_id].item() for token_id in sarcastic_intent_ids) - \
290
+ sum(intent_probs[0, token_id].item() for token_id in sincere_intent_ids)
291
+
292
+ # Weighted combination of all scores
293
+ combined_score = (0.3 * bert_score) + (0.3 * pattern_score) + \
294
+ (0.2 * incongruity_score) + (0.2 * intent_score)
295
+
296
+ # Normalize to [0, 1]
297
+ return max(0, min(combined_score, 1))
298
+
299
+ def get_confidence_adjustment(text, emotion_scores):
300
+ """Adjust confidence based on text characteristics"""
301
+ # Text length adjustment - very short texts are harder to analyze
302
+ text_length = len(text.split())
303
+ length_factor = min(text_length / 20, 1.0) # Texts with less than 20 words get reduced confidence
304
+
305
+ # Emotion intensity - stronger emotions should have higher confidence
306
+ max_score = max(emotion_scores.values())
307
+ intensity_factor = max_score
308
+
309
+ # Ambiguity adjustment - if multiple emotions have similar scores, reduce confidence
310
+ sorted_scores = sorted(emotion_scores.values(), reverse=True)
311
+ if len(sorted_scores) > 1:
312
+ top_gap = sorted_scores[0] - sorted_scores[1]
313
+ ambiguity_factor = min(top_gap * 2, 1.0) # Small gap means ambiguous emotion
314
+ else:
315
+ ambiguity_factor = 1.0
316
+
317
+ # Combined adjustment factor
318
+ adjustment = (length_factor + intensity_factor + ambiguity_factor) / 3
319
+
320
+ return adjustment
321
 
322
  def analyze_emotions(text):
323
+ """Analyze emotions in text using improved BERT-based approach with robust sarcasm detection"""
324
  if not text or not text.strip():
325
  return None, {"error": "Please enter some text to analyze"}
326
 
327
  try:
328
+ # Calculate raw scores for each emotion
329
  emotion_scores = {}
330
 
331
+ # For each standard emotion category (excluding sarcasm)
 
 
 
 
 
 
332
  for emotion, keywords in EMOTION_CATEGORIES.items():
333
  if emotion == 'sarcasm':
 
 
 
 
334
  continue
335
 
336
+ # Use specialized function to get emotion score
337
+ emotion_scores[emotion] = get_emotion_score(text, emotion, keywords)
338
+
339
+ # Special handling for sarcasm with multi-method approach
340
+ emotion_scores['sarcasm'] = analyze_sarcasm(text)
341
+
342
+ # Get confidence adjustment factor based on text characteristics
343
+ confidence_adjustment = get_confidence_adjustment(text, emotion_scores)
344
+
345
+ # Apply chain-of-thought decision making for final analysis
346
+ final_scores = {}
 
 
 
347
 
348
+ # Step 1: Look for extremely high sarcasm score - this can override other emotions
349
+ if emotion_scores['sarcasm'] > 0.7:
350
+ # High sarcasm detected - reduce emotional scores
351
+ for emotion in emotion_scores:
352
+ if emotion != 'sarcasm':
353
+ # Reduce other emotions based on sarcasm strength
354
+ emotion_scores[emotion] *= (1 - (emotion_scores['sarcasm'] * 0.5))
355
 
356
+ # Step 2: If sarcasm score is moderate (0.3-0.7), maintain other emotions but boost sarcasm
357
+ elif emotion_scores['sarcasm'] > 0.3:
358
+ # Moderate sarcasm - keep as complementary emotion
359
+ emotion_scores['sarcasm'] *= 1.2 # Slight boost to ensure it's noticed
360
+
361
+ # Step 3: If sarcasm score is low, reduce it further
362
+ else:
363
+ emotion_scores['sarcasm'] *= 0.8 # Reduce low sarcasm scores to avoid false positives
364
+
365
+ # Step 4: Check for emotional extremes that could override sarcasm
366
+ max_emotion = max(emotion_scores.items(), key=lambda x: x[1] if x[0] != 'sarcasm' else 0)
367
+ if max_emotion[1] > 0.7 and max_emotion[0] != 'sarcasm':
368
+ # Strong emotion detected - this could reduce sarcasm
369
+ emotion_scores['sarcasm'] *= 0.8
370
+
371
+ # Step 5: Normalize scores to ensure they sum to 1
372
+ total_score = sum(emotion_scores.values())
373
+ if total_score > 0:
374
+ final_scores = {emotion: score / total_score for emotion, score in emotion_scores.items()}
375
  else:
376
+ # Fallback if all scores are zero
377
+ final_scores = {emotion: 1/len(emotion_scores) for emotion in emotion_scores}
378
+
379
+ # Apply confidence adjustment
380
+ final_scores = {emotion: score * confidence_adjustment for emotion, score in final_scores.items()}
381
+
382
+ # Normalize again after adjustment
383
+ total_adjusted = sum(final_scores.values())
384
+ if total_adjusted > 0:
385
+ final_scores = {emotion: score / total_adjusted for emotion, score in final_scores.items()}
386
 
387
  # Sort emotions by score
388
+ sorted_emotions = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)
389
  emotions, scores = zip(*sorted_emotions)
390
 
391
  # Create visualization
 
398
  "detailed_scores": {emotion: f"{score*100:.1f}%" for emotion, score in zip(emotions, scores)}
399
  }
400
 
401
+ # Add contextual notes if applicable
402
+ if emotions[0] == 'sarcasm' and scores[0] > 0.3:
403
+ output["note"] = f"Sarcasm detected with {scores[0]*100:.1f}% confidence. Context suggests ironic or mocking tone."
404
+ elif 'sarcasm' in final_scores and final_scores['sarcasm'] > 0.2:
405
+ output["note"] = f"Some sarcastic elements detected alongside {emotions[0]}."
406
 
407
  return fig, output
408
 
 
436
  display_text = text if len(text) < 50 else text[:47] + "..."
437
  ax.set_title(f'Emotion Analysis: "{display_text}"', pad=20)
438
  else:
439
+ ax.set_title('BERT-based Emotion Analysis', pad=20)
440
 
441
  plt.tight_layout()
442
  return fig
 
453
  gr.Plot(label="Emotion Distribution"),
454
  gr.JSON(label="Analysis Results")
455
  ],
456
+ title="🧠 BERT-based Emotion Analysis",
457
+ description="""This app analyzes emotions in text using a specialized BERT-based approach.
458
+ It identifies how well the input text aligns with seven emotional categories: joy, sadness, anger, fear, surprise, love, and sarcasm.
459
+ The analysis leverages BERT's contextual understanding along with sophisticated pattern recognition to evaluate emotional content.""",
460
  examples=[
461
  ["I can't wait for the concert tonight! It's going to be amazing!"],
462
  ["The news about the layoffs has left everyone feeling devastated."],