Upload sentiment_analyzer.py
Browse files- sentiment_analyzer.py +22 -11
sentiment_analyzer.py
CHANGED
|
@@ -180,13 +180,15 @@ class LexiconBasedAnalyzer:
|
|
| 180 |
negation_count += 1
|
| 181 |
break
|
| 182 |
|
| 183 |
-
# Check for intensifiers
|
| 184 |
-
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
-
# Check for diminishers
|
| 188 |
-
|
| 189 |
-
|
|
|
|
| 190 |
|
| 191 |
# Check sentiment
|
| 192 |
if token in self.lexicon.positive_words:
|
|
@@ -208,17 +210,26 @@ class LexiconBasedAnalyzer:
|
|
| 208 |
|
| 209 |
i += 1
|
| 210 |
|
| 211 |
-
# Calculate final sentiment
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
polarity = 'neutral'
|
| 215 |
confidence = 0.0
|
| 216 |
elif positive_score > negative_score:
|
| 217 |
polarity = 'positive'
|
| 218 |
-
confidence =
|
| 219 |
else:
|
| 220 |
polarity = 'negative'
|
| 221 |
-
confidence =
|
| 222 |
|
| 223 |
return {
|
| 224 |
'polarity': polarity,
|
|
|
|
| 180 |
negation_count += 1
|
| 181 |
break
|
| 182 |
|
| 183 |
+
# Check for intensifiers (look back up to 2 tokens)
|
| 184 |
+
for k in range(max(0, i-2), i):
|
| 185 |
+
if k >= 0 and tokens[k] in self.lexicon.intensifiers:
|
| 186 |
+
intensifier_strength = max(intensifier_strength, self.lexicon.intensifiers[tokens[k]])
|
| 187 |
|
| 188 |
+
# Check for diminishers (look back up to 2 tokens)
|
| 189 |
+
for k in range(max(0, i-2), i):
|
| 190 |
+
if k >= 0 and tokens[k] in self.lexicon.diminishers:
|
| 191 |
+
diminisher_strength = min(diminisher_strength, self.lexicon.diminishers[tokens[k]])
|
| 192 |
|
| 193 |
# Check sentiment
|
| 194 |
if token in self.lexicon.positive_words:
|
|
|
|
| 210 |
|
| 211 |
i += 1
|
| 212 |
|
| 213 |
+
# Calculate final sentiment with improved scoring
|
| 214 |
+
# Normalize scores to prevent extreme values from dominating
|
| 215 |
+
total_raw = positive_score + negative_score
|
| 216 |
+
if total_raw > 0:
|
| 217 |
+
# Use logarithmic scaling for better balance (but keep original for display)
|
| 218 |
+
pos_normalized = positive_score / total_raw
|
| 219 |
+
neg_normalized = negative_score / total_raw
|
| 220 |
+
else:
|
| 221 |
+
pos_normalized = 0.0
|
| 222 |
+
neg_normalized = 0.0
|
| 223 |
+
|
| 224 |
+
if total_raw == 0:
|
| 225 |
polarity = 'neutral'
|
| 226 |
confidence = 0.0
|
| 227 |
elif positive_score > negative_score:
|
| 228 |
polarity = 'positive'
|
| 229 |
+
confidence = pos_normalized
|
| 230 |
else:
|
| 231 |
polarity = 'negative'
|
| 232 |
+
confidence = neg_normalized
|
| 233 |
|
| 234 |
return {
|
| 235 |
'polarity': polarity,
|