Spaces:
Sleeping
Sleeping
Update analyzer.py
Browse files- analyzer.py +19 -17
analyzer.py
CHANGED
|
@@ -728,12 +728,12 @@ def analyze_sentiment_batch(
|
|
| 728 |
return _simple_lexicon_sentiment(texts)
|
| 729 |
for text in texts:
|
| 730 |
if not text or len(text.strip()) < 3:
|
| 731 |
-
results.append({"label": "
|
| 732 |
continue
|
| 733 |
vs = vader.polarity_scores(text)
|
| 734 |
c = vs["compound"]
|
| 735 |
results.append({
|
| 736 |
-
"label": "
|
| 737 |
"score": abs(c),
|
| 738 |
"compound": c,
|
| 739 |
})
|
|
@@ -743,15 +743,17 @@ def analyze_sentiment_batch(
|
|
| 743 |
chunk = texts[i: i + batch_size]
|
| 744 |
safe = [t[:1000] if t else " " for t in chunk]
|
| 745 |
try:
|
|
|
|
| 746 |
for r in pipe(safe):
|
|
|
|
| 747 |
results.append({
|
| 748 |
-
"label":
|
| 749 |
"score": round(r["score"], 4),
|
| 750 |
"compound": r["score"] if r["label"] == "POSITIVE" else -r["score"],
|
| 751 |
})
|
| 752 |
except Exception:
|
| 753 |
for _ in chunk:
|
| 754 |
-
results.append({"label": "
|
| 755 |
return results
|
| 756 |
|
| 757 |
|
|
@@ -762,28 +764,28 @@ def _simple_lexicon_sentiment(texts: List[str]) -> List[Dict]:
|
|
| 762 |
for text in texts:
|
| 763 |
words = set(text.lower().split())
|
| 764 |
p = len(words & pos); n = len(words & neg)
|
| 765 |
-
if p > n: out.append({"label": "
|
| 766 |
-
elif n > p: out.append({"label": "
|
| 767 |
-
else: out.append({"label": "
|
| 768 |
return out
|
| 769 |
|
| 770 |
|
| 771 |
def sentiment_summary(results: List[Dict]) -> Dict:
|
| 772 |
if not results:
|
| 773 |
-
return {"
|
| 774 |
-
"avg_compound": 0.0, "pos_pct": 0, "neg_pct": 0, "neu_pct": 0}
|
| 775 |
counts = Counter(r["label"] for r in results)
|
| 776 |
total = len(results)
|
| 777 |
avg = float(np.mean([r.get("compound", 0.0) for r in results]))
|
| 778 |
return {
|
| 779 |
-
"
|
| 780 |
-
"
|
| 781 |
-
"
|
| 782 |
"total": total,
|
| 783 |
"avg_compound": round(avg, 3),
|
| 784 |
-
"pos_pct": round(counts.get("
|
| 785 |
-
"neg_pct": round(counts.get("
|
| 786 |
-
"neu_pct": round(counts.get("
|
| 787 |
}
|
| 788 |
|
| 789 |
|
|
@@ -822,6 +824,6 @@ def sentiment_weighted_keywords(
|
|
| 822 |
for text, sent in zip(texts, sentiment_results):
|
| 823 |
tokens = [t for t in re.findall(r"[a-zA-Z]{3,}", text.lower()) if t not in STOPWORDS]
|
| 824 |
weight = sent.get("score", 0.5)
|
| 825 |
-
if sent["label"] == "
|
| 826 |
-
elif sent["label"] == "
|
| 827 |
return pos_freq.most_common(top_n), neg_freq.most_common(top_n)
|
|
|
|
| 728 |
return _simple_lexicon_sentiment(texts)
|
| 729 |
for text in texts:
|
| 730 |
if not text or len(text.strip()) < 3:
|
| 731 |
+
results.append({"label": "Neutral", "score": 0.0, "compound": 0.0})
|
| 732 |
continue
|
| 733 |
vs = vader.polarity_scores(text)
|
| 734 |
c = vs["compound"]
|
| 735 |
results.append({
|
| 736 |
+
"label": "Positively Engagement" if c >= 0.05 else ("Negatively Engagement" if c <= -0.05 else "Neutral"),
|
| 737 |
"score": abs(c),
|
| 738 |
"compound": c,
|
| 739 |
})
|
|
|
|
| 743 |
chunk = texts[i: i + batch_size]
|
| 744 |
safe = [t[:1000] if t else " " for t in chunk]
|
| 745 |
try:
|
| 746 |
+
_hf_label_map = {"POSITIVE": "Positively Engagement", "NEGATIVE": "Negatively Engagement"}
|
| 747 |
for r in pipe(safe):
|
| 748 |
+
mapped = _hf_label_map.get(r["label"], "Neutral")
|
| 749 |
results.append({
|
| 750 |
+
"label": mapped,
|
| 751 |
"score": round(r["score"], 4),
|
| 752 |
"compound": r["score"] if r["label"] == "POSITIVE" else -r["score"],
|
| 753 |
})
|
| 754 |
except Exception:
|
| 755 |
for _ in chunk:
|
| 756 |
+
results.append({"label": "Neutral", "score": 0.5, "compound": 0.0})
|
| 757 |
return results
|
| 758 |
|
| 759 |
|
|
|
|
| 764 |
for text in texts:
|
| 765 |
words = set(text.lower().split())
|
| 766 |
p = len(words & pos); n = len(words & neg)
|
| 767 |
+
if p > n: out.append({"label": "Positively Engagement", "score": 0.7, "compound": 0.5})
|
| 768 |
+
elif n > p: out.append({"label": "Negatively Engagement", "score": 0.7, "compound": -0.5})
|
| 769 |
+
else: out.append({"label": "Neutral", "score": 0.5, "compound": 0.0})
|
| 770 |
return out
|
| 771 |
|
| 772 |
|
| 773 |
def sentiment_summary(results: List[Dict]) -> Dict:
|
| 774 |
if not results:
|
| 775 |
+
return {"Positively Engagement": 0, "Negatively Engagement": 0, "Neutral": 0,
|
| 776 |
+
"total": 0, "avg_compound": 0.0, "pos_pct": 0, "neg_pct": 0, "neu_pct": 0}
|
| 777 |
counts = Counter(r["label"] for r in results)
|
| 778 |
total = len(results)
|
| 779 |
avg = float(np.mean([r.get("compound", 0.0) for r in results]))
|
| 780 |
return {
|
| 781 |
+
"Positively Engagement": counts.get("Positively Engagement", 0),
|
| 782 |
+
"Negatively Engagement": counts.get("Negatively Engagement", 0),
|
| 783 |
+
"Neutral": counts.get("Neutral", 0),
|
| 784 |
"total": total,
|
| 785 |
"avg_compound": round(avg, 3),
|
| 786 |
+
"pos_pct": round(counts.get("Positively Engagement", 0) / total * 100, 1),
|
| 787 |
+
"neg_pct": round(counts.get("Negatively Engagement", 0) / total * 100, 1),
|
| 788 |
+
"neu_pct": round(counts.get("Neutral", 0) / total * 100, 1),
|
| 789 |
}
|
| 790 |
|
| 791 |
|
|
|
|
| 824 |
for text, sent in zip(texts, sentiment_results):
|
| 825 |
tokens = [t for t in re.findall(r"[a-zA-Z]{3,}", text.lower()) if t not in STOPWORDS]
|
| 826 |
weight = sent.get("score", 0.5)
|
| 827 |
+
if sent["label"] == "Positively Engagement": pos_freq.update({t: weight for t in tokens})
|
| 828 |
+
elif sent["label"] == "Negatively Engagement": neg_freq.update({t: weight for t in tokens})
|
| 829 |
return pos_freq.most_common(top_n), neg_freq.most_common(top_n)
|