Sentiment / services /sentiment.py
NzTama's picture
Initial clean deploy: Sentiment Analysis
fa8ff66
"""
sentiment.py – Sentiment analysis using IndoBERT / HuggingFace pipeline.
Model is loaded lazily (first call) to avoid crashing at import time.
"""
from __future__ import annotations
import os
from typing import Optional
# ── Model configuration ────────────────────────────────────────────────────────
# If you have a local fine-tuned model, place it in ./indoBERT-sentiment
# and set MODEL_DIR. Otherwise it downloads from HuggingFace.
_LOCAL_MODEL_DIR = os.path.join(os.path.dirname(__file__), "..", "indoBERT-sentiment")
_HF_MODEL_ID = "taufiqdp/indonesian-sentiment"
# ── Lazy-loaded globals ────────────────────────────────────────────────────────
_pipeline: Optional[object] = None
def _load_pipeline():
global _pipeline
if _pipeline is not None:
return _pipeline
import torch
from transformers import pipeline as hf_pipeline
# Prefer local model if it exists (avoids repeated downloads in Docker)
if os.path.isdir(_LOCAL_MODEL_DIR) and os.listdir(_LOCAL_MODEL_DIR):
model_source = _LOCAL_MODEL_DIR
print(f"[Sentiment] Loading model from local dir: {model_source}")
else:
model_source = _HF_MODEL_ID
print(f"[Sentiment] Local model not found. Downloading from HuggingFace: {model_source}")
device = 0 if torch.cuda.is_available() else -1
_pipeline = hf_pipeline(
"text-classification",
model=model_source,
tokenizer=model_source,
device=device,
truncation=True,
max_length=256,
return_all_scores=False,
)
print("[Sentiment] Model loaded successfully.")
return _pipeline
# ── Helpers ────────────────────────────────────────────────────────────────────
def _normalize_label(lbl: str) -> str:
"""Normalise raw model label to 'positif', 'negatif', or 'netral'."""
l = lbl.lower()
if l in ("positif", "positive", "pos"):
return "positif"
if l in ("negatif", "negative", "neg"):
return "negatif"
if l in ("netral", "neutral", "neu"):
return "netral"
if "label_" in l:
try:
from transformers import AutoConfig
cfg = AutoConfig.from_pretrained(_HF_MODEL_ID)
idx = int(l.split("_")[-1])
return _normalize_label(cfg.id2label[idx])
except Exception:
return "netral"
return "netral"
# ── Keywords Override ──────────────────────────────────────────────────────────
_NEGATIVE_KEYWORDS = {
"bego", "bodoh", "jelek", "goblok", "bangsat", "kampungan", "tolol",
"kontol", "kirik", "koplok", "anjing", "babi", "monyet", "belegug",
"kik", "goblog", "kntl", "buruk", "lemah", "rendah", "gagal", "hancur",
"rusak", "cacat", "jahat", "dusta", "bohong", "fitnah", "korup", "curang",
"palsu", "salah", "sesat", "kejam", "dendam", "malas", "lambat", "menyakitkan",
"tercela", "merugikan", "menghina", "melecehkan", "menyesatkan"
}
_POSITIVE_KEYWORDS = {
"bagus", "hebat", "mantap", "luar biasa", "keren", "canggih",
"cerdas", "pintar", "senang", "bahagia", "memuaskan", "unggul",
"sempurna", "berhasil", "luas", "indah"
}
_NEUTRAL_KEYWORDS = {
"ok", "oke", "biasa", "lumayan", "standar", "normal", "cukup", "agak"
}
def _override_label(text: str, model_label: str) -> str:
text_lower = text.lower()
if any(w in text_lower for w in _NEGATIVE_KEYWORDS):
return "negatif"
if any(w in text_lower for w in _POSITIVE_KEYWORDS):
return "positif"
if any(w in text_lower for w in _NEUTRAL_KEYWORDS):
return "netral"
return model_label
# ── Public API ─────────────────────────────────────────────────────────────────
def analyze_sentiment(texts: list) -> dict:
"""
Run sentiment analysis on a list of text strings.
Args:
texts: list of pre-processed strings
Returns:
dict with keys: positif, negatif, netral, total, detail
Example:
{
"positif": 12, "negatif": 4, "netral": 6, "total": 22,
"detail": [{"text": "...", "label": "positif", "score": 0.95}, ...]
}
"""
if not texts:
return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []}
# Filter out empty strings
texts = [t for t in texts if t and t.strip()]
if not texts:
return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []}
clf = _load_pipeline()
try:
preds = clf(texts, batch_size=16, truncation=True)
except Exception as e:
print(f"[Sentiment] Prediction error: {e}")
return {"positif": 0, "negatif": 0, "netral": 0, "total": len(texts), "detail": []}
counts = {"positif": 0, "negatif": 0, "netral": 0}
detail = []
for text, pred in zip(texts, preds):
model_label = _normalize_label(pred["label"])
final_label = _override_label(text, model_label)
counts[final_label] += 1
detail.append({
"text": text[:200],
"label": final_label,
"score": round(float(pred["score"]), 4),
})
return {
"positif": counts["positif"],
"negatif": counts["negatif"],
"netral": counts["netral"],
"total": len(texts),
"detail": detail,
}