philverify-api / nlp /sentiment.py
Ryan Christian D. Deniega
feat: PhilVerify Phase 1-3 β€” FastAPI backend, NLP pipeline, TF-IDF classifier (23/23 tests)
6c9b8f1
"""
PhilVerify β€” Sentiment & Emotion Analyzer
Uses HuggingFace transformers with graceful fallback to lexicon-based scoring.
"""
import logging
from dataclasses import dataclass
logger = logging.getLogger(__name__)
# ── Simple lexicons for fallback ──────────────────────────────────────────────
_NEGATIVE_WORDS = {
"fake", "false", "lie", "liar", "hoax", "scam", "fraud", "corrupt",
"criminal", "illegal", "murder", "die", "death", "dead", "kill",
"patay", "namatay", "peke", "sinungaling", "corrupt", "magnanakaw",
"kasamaan", "krimen", "karahasan", "pandemic", "sakit", "epidemya",
"grabe", "nakakatakot", "nakakainis", "nakakagalit", "kahiya",
}
_POSITIVE_WORDS = {
"good", "great", "excellent", "amazing", "wonderful", "positive",
"success", "win", "victory", "help", "support", "safe", "free",
"maganda", "magaling", "mahusay", "maayos", "tagumpay", "ligtas",
"masaya", "mabuti", "mahalaga", "mahal", "salamat", "pagbabago",
}
_FEAR_WORDS = {
"takot", "fear", "scared", "afraid", "terror", "danger", "dangerous",
"banta", "panganib", "nakakatakot", "kalamidad", "lindol",
}
_ANGER_WORDS = {
"galit", "angry", "anger", "furious", "rage", "outrage", "poot",
"nakakagalit", "nakakaasar", "sumpain", "putang", "gago",
}
@dataclass
class SentimentResult:
sentiment: str # positive | negative | neutral | high positive | high negative
sentiment_score: float # -1.0 to 1.0
emotion: str # anger | fear | joy | sadness | neutral
emotion_score: float # 0.0 to 1.0
method: str # "transformer" | "lexicon"
class SentimentAnalyzer:
"""
Two-strategy sentiment analysis:
Primary β€” cardiffnlp/twitter-roberta-base-sentiment-latest (social media optimized)
Fallback β€” lexicon-based word counting
"""
def __init__(self):
self._sentiment_pipe = None
self._emotion_pipe = None
self._loaded = False
def _load_models(self):
if self._loaded:
return
try:
from transformers import pipeline
self._sentiment_pipe = pipeline(
"text-classification",
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
top_k=1,
)
self._emotion_pipe = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
top_k=1,
)
logger.info("Sentiment / emotion models loaded")
except Exception as e:
logger.warning("Transformer models not available (%s) β€” using lexicon fallback", e)
self._loaded = True
def _lexicon_analyze(self, text: str) -> SentimentResult:
words = set(text.lower().split())
neg = len(words & _NEGATIVE_WORDS)
pos = len(words & _POSITIVE_WORDS)
fear = len(words & _FEAR_WORDS)
anger = len(words & _ANGER_WORDS)
total = neg + pos
if total == 0:
score = 0.0
else:
score = (pos - neg) / total
if score > 0.3:
sentiment = "high positive" if score > 0.6 else "positive"
elif score < -0.3:
sentiment = "high negative" if score < -0.6 else "negative"
else:
sentiment = "neutral"
emotion_score = 0.0
if fear > anger:
emotion = "fear"
emotion_score = min(fear / max(len(words), 1) * 5, 1.0)
elif anger > 0:
emotion = "anger"
emotion_score = min(anger / max(len(words), 1) * 5, 1.0)
elif pos > neg:
emotion = "joy"
emotion_score = min(pos / max(len(words), 1) * 5, 1.0)
elif neg > 0:
emotion = "sadness"
emotion_score = min(neg / max(len(words), 1) * 5, 1.0)
else:
emotion = "neutral"
emotion_score = 0.0
return SentimentResult(sentiment, round(score, 3), emotion, round(emotion_score, 3), "lexicon")
def analyze(self, text: str) -> SentimentResult:
self._load_models()
snippet = text[:512] # Transformer token limit
if self._sentiment_pipe and self._emotion_pipe:
try:
s_out = self._sentiment_pipe(snippet)[0]
e_out = self._emotion_pipe(snippet)[0]
raw_label = s_out["label"].lower()
score = s_out["score"]
if "positive" in raw_label:
sentiment = "high positive" if score > 0.85 else "positive"
s_score = score
elif "negative" in raw_label:
sentiment = "high negative" if score > 0.85 else "negative"
s_score = -score
else:
sentiment = "neutral"
s_score = 0.0
emotion = e_out["label"].lower()
emotion_score = e_out["score"]
return SentimentResult(sentiment, round(s_score, 3), emotion, round(emotion_score, 3), "transformer")
except Exception as e:
logger.warning("Transformer inference error: %s β€” falling back to lexicon", e)
return self._lexicon_analyze(text)