""" PhilVerify — Sentiment & Emotion Analyzer Uses HuggingFace transformers with graceful fallback to lexicon-based scoring. """ import logging from dataclasses import dataclass logger = logging.getLogger(__name__) # ── Simple lexicons for fallback ────────────────────────────────────────────── _NEGATIVE_WORDS = { "fake", "false", "lie", "liar", "hoax", "scam", "fraud", "corrupt", "criminal", "illegal", "murder", "die", "death", "dead", "kill", "patay", "namatay", "peke", "sinungaling", "corrupt", "magnanakaw", "kasamaan", "krimen", "karahasan", "pandemic", "sakit", "epidemya", "grabe", "nakakatakot", "nakakainis", "nakakagalit", "kahiya", } _POSITIVE_WORDS = { "good", "great", "excellent", "amazing", "wonderful", "positive", "success", "win", "victory", "help", "support", "safe", "free", "maganda", "magaling", "mahusay", "maayos", "tagumpay", "ligtas", "masaya", "mabuti", "mahalaga", "mahal", "salamat", "pagbabago", } _FEAR_WORDS = { "takot", "fear", "scared", "afraid", "terror", "danger", "dangerous", "banta", "panganib", "nakakatakot", "kalamidad", "lindol", } _ANGER_WORDS = { "galit", "angry", "anger", "furious", "rage", "outrage", "poot", "nakakagalit", "nakakaasar", "sumpain", "putang", "gago", } @dataclass class SentimentResult: sentiment: str # positive | negative | neutral | high positive | high negative sentiment_score: float # -1.0 to 1.0 emotion: str # anger | fear | joy | sadness | neutral emotion_score: float # 0.0 to 1.0 method: str # "transformer" | "lexicon" class SentimentAnalyzer: """ Two-strategy sentiment analysis: Primary — cardiffnlp/twitter-roberta-base-sentiment-latest (social media optimized) Fallback — lexicon-based word counting """ def __init__(self): self._sentiment_pipe = None self._emotion_pipe = None self._loaded = False def _load_models(self): if self._loaded: return try: from transformers import pipeline self._sentiment_pipe = pipeline( "text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest", top_k=1, ) self._emotion_pipe = pipeline( "text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1, ) logger.info("Sentiment / emotion models loaded") except Exception as e: logger.warning("Transformer models not available (%s) — using lexicon fallback", e) self._loaded = True def _lexicon_analyze(self, text: str) -> SentimentResult: words = set(text.lower().split()) neg = len(words & _NEGATIVE_WORDS) pos = len(words & _POSITIVE_WORDS) fear = len(words & _FEAR_WORDS) anger = len(words & _ANGER_WORDS) total = neg + pos if total == 0: score = 0.0 else: score = (pos - neg) / total if score > 0.3: sentiment = "high positive" if score > 0.6 else "positive" elif score < -0.3: sentiment = "high negative" if score < -0.6 else "negative" else: sentiment = "neutral" emotion_score = 0.0 if fear > anger: emotion = "fear" emotion_score = min(fear / max(len(words), 1) * 5, 1.0) elif anger > 0: emotion = "anger" emotion_score = min(anger / max(len(words), 1) * 5, 1.0) elif pos > neg: emotion = "joy" emotion_score = min(pos / max(len(words), 1) * 5, 1.0) elif neg > 0: emotion = "sadness" emotion_score = min(neg / max(len(words), 1) * 5, 1.0) else: emotion = "neutral" emotion_score = 0.0 return SentimentResult(sentiment, round(score, 3), emotion, round(emotion_score, 3), "lexicon") def analyze(self, text: str) -> SentimentResult: self._load_models() snippet = text[:512] # Transformer token limit if self._sentiment_pipe and self._emotion_pipe: try: s_out = self._sentiment_pipe(snippet)[0] e_out = self._emotion_pipe(snippet)[0] raw_label = s_out["label"].lower() score = s_out["score"] if "positive" in raw_label: sentiment = "high positive" if score > 0.85 else "positive" s_score = score elif "negative" in raw_label: sentiment = "high negative" if score > 0.85 else "negative" s_score = -score else: sentiment = "neutral" s_score = 0.0 emotion = e_out["label"].lower() emotion_score = e_out["score"] return SentimentResult(sentiment, round(s_score, 3), emotion, round(emotion_score, 3), "transformer") except Exception as e: logger.warning("Transformer inference error: %s — falling back to lexicon", e) return self._lexicon_analyze(text)