rocky250 commited on
Commit
1de7668
·
verified ·
1 Parent(s): 241dd67

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +19 -17
analyzer.py CHANGED
@@ -728,12 +728,12 @@ def analyze_sentiment_batch(
728
  return _simple_lexicon_sentiment(texts)
729
  for text in texts:
730
  if not text or len(text.strip()) < 3:
731
- results.append({"label": "NEUTRAL", "score": 0.0, "compound": 0.0})
732
  continue
733
  vs = vader.polarity_scores(text)
734
  c = vs["compound"]
735
  results.append({
736
- "label": "POSITIVELY ENGAGEMENT" if c >= 0.05 else ("NEGATIVELY ENGAGEMENT" if c <= -0.05 else "NEUTRAL"),
737
  "score": abs(c),
738
  "compound": c,
739
  })
@@ -743,15 +743,17 @@ def analyze_sentiment_batch(
743
  chunk = texts[i: i + batch_size]
744
  safe = [t[:1000] if t else " " for t in chunk]
745
  try:
 
746
  for r in pipe(safe):
 
747
  results.append({
748
- "label": r["label"],
749
  "score": round(r["score"], 4),
750
  "compound": r["score"] if r["label"] == "POSITIVE" else -r["score"],
751
  })
752
  except Exception:
753
  for _ in chunk:
754
- results.append({"label": "NEUTRAL", "score": 0.5, "compound": 0.0})
755
  return results
756
 
757
 
@@ -762,28 +764,28 @@ def _simple_lexicon_sentiment(texts: List[str]) -> List[Dict]:
762
  for text in texts:
763
  words = set(text.lower().split())
764
  p = len(words & pos); n = len(words & neg)
765
- if p > n: out.append({"label": "POSITIVELY ENGAGEMENT", "score": 0.7, "compound": 0.5})
766
- elif n > p: out.append({"label": "NEGATIVE ENGAGEMENT", "score": 0.7, "compound": -0.5})
767
- else: out.append({"label": "NEUTRAL", "score": 0.5, "compound": 0.0})
768
  return out
769
 
770
 
771
  def sentiment_summary(results: List[Dict]) -> Dict:
772
  if not results:
773
- return {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0, "total": 0,
774
- "avg_compound": 0.0, "pos_pct": 0, "neg_pct": 0, "neu_pct": 0}
775
  counts = Counter(r["label"] for r in results)
776
  total = len(results)
777
  avg = float(np.mean([r.get("compound", 0.0) for r in results]))
778
  return {
779
- "POSITIVELY ENGAGEMENT": counts.get("POSITIVE", 0),
780
- "NEGATIVELY ENGAGEMENT": counts.get("NEGATIVE", 0),
781
- "NEUTRAL": counts.get("NEUTRAL", 0),
782
  "total": total,
783
  "avg_compound": round(avg, 3),
784
- "pos_pct": round(counts.get("POSITIVE", 0) / total * 100, 1),
785
- "neg_pct": round(counts.get("NEGATIVE", 0) / total * 100, 1),
786
- "neu_pct": round(counts.get("NEUTRAL", 0) / total * 100, 1),
787
  }
788
 
789
 
@@ -822,6 +824,6 @@ def sentiment_weighted_keywords(
822
  for text, sent in zip(texts, sentiment_results):
823
  tokens = [t for t in re.findall(r"[a-zA-Z]{3,}", text.lower()) if t not in STOPWORDS]
824
  weight = sent.get("score", 0.5)
825
- if sent["label"] == "POSITIVELY ENGAGEMENT": pos_freq.update({t: weight for t in tokens})
826
- elif sent["label"] == "NEGATIVELY ENGAGEMENT": neg_freq.update({t: weight for t in tokens})
827
  return pos_freq.most_common(top_n), neg_freq.most_common(top_n)
 
728
  return _simple_lexicon_sentiment(texts)
729
  for text in texts:
730
  if not text or len(text.strip()) < 3:
731
+ results.append({"label": "Neutral", "score": 0.0, "compound": 0.0})
732
  continue
733
  vs = vader.polarity_scores(text)
734
  c = vs["compound"]
735
  results.append({
736
+ "label": "Positively Engagement" if c >= 0.05 else ("Negatively Engagement" if c <= -0.05 else "Neutral"),
737
  "score": abs(c),
738
  "compound": c,
739
  })
 
743
  chunk = texts[i: i + batch_size]
744
  safe = [t[:1000] if t else " " for t in chunk]
745
  try:
746
+ _hf_label_map = {"POSITIVE": "Positively Engagement", "NEGATIVE": "Negatively Engagement"}
747
  for r in pipe(safe):
748
+ mapped = _hf_label_map.get(r["label"], "Neutral")
749
  results.append({
750
+ "label": mapped,
751
  "score": round(r["score"], 4),
752
  "compound": r["score"] if r["label"] == "POSITIVE" else -r["score"],
753
  })
754
  except Exception:
755
  for _ in chunk:
756
+ results.append({"label": "Neutral", "score": 0.5, "compound": 0.0})
757
  return results
758
 
759
 
 
764
  for text in texts:
765
  words = set(text.lower().split())
766
  p = len(words & pos); n = len(words & neg)
767
+ if p > n: out.append({"label": "Positively Engagement", "score": 0.7, "compound": 0.5})
768
+ elif n > p: out.append({"label": "Negatively Engagement", "score": 0.7, "compound": -0.5})
769
+ else: out.append({"label": "Neutral", "score": 0.5, "compound": 0.0})
770
  return out
771
 
772
 
773
  def sentiment_summary(results: List[Dict]) -> Dict:
774
  if not results:
775
+ return {"Positively Engagement": 0, "Negatively Engagement": 0, "Neutral": 0,
776
+ "total": 0, "avg_compound": 0.0, "pos_pct": 0, "neg_pct": 0, "neu_pct": 0}
777
  counts = Counter(r["label"] for r in results)
778
  total = len(results)
779
  avg = float(np.mean([r.get("compound", 0.0) for r in results]))
780
  return {
781
+ "Positively Engagement": counts.get("Positively Engagement", 0),
782
+ "Negatively Engagement": counts.get("Negatively Engagement", 0),
783
+ "Neutral": counts.get("Neutral", 0),
784
  "total": total,
785
  "avg_compound": round(avg, 3),
786
+ "pos_pct": round(counts.get("Positively Engagement", 0) / total * 100, 1),
787
+ "neg_pct": round(counts.get("Negatively Engagement", 0) / total * 100, 1),
788
+ "neu_pct": round(counts.get("Neutral", 0) / total * 100, 1),
789
  }
790
 
791
 
 
824
  for text, sent in zip(texts, sentiment_results):
825
  tokens = [t for t in re.findall(r"[a-zA-Z]{3,}", text.lower()) if t not in STOPWORDS]
826
  weight = sent.get("score", 0.5)
827
+ if sent["label"] == "Positively Engagement": pos_freq.update({t: weight for t in tokens})
828
+ elif sent["label"] == "Negatively Engagement": neg_freq.update({t: weight for t in tokens})
829
  return pos_freq.most_common(top_n), neg_freq.most_common(top_n)