""" sentiment.py – Sentiment analysis using IndoBERT / HuggingFace pipeline. Model is loaded lazily (first call) to avoid crashing at import time. """ from __future__ import annotations import os from typing import Optional # ── Model configuration ──────────────────────────────────────────────────────── # If you have a local fine-tuned model, place it in ./indoBERT-sentiment # and set MODEL_DIR. Otherwise it downloads from HuggingFace. _LOCAL_MODEL_DIR = os.path.join(os.path.dirname(__file__), "..", "indoBERT-sentiment") _HF_MODEL_ID = "taufiqdp/indonesian-sentiment" # ── Lazy-loaded globals ──────────────────────────────────────────────────────── _pipeline: Optional[object] = None def _load_pipeline(): global _pipeline if _pipeline is not None: return _pipeline import torch from transformers import pipeline as hf_pipeline # Prefer local model if it exists (avoids repeated downloads in Docker) if os.path.isdir(_LOCAL_MODEL_DIR) and os.listdir(_LOCAL_MODEL_DIR): model_source = _LOCAL_MODEL_DIR print(f"[Sentiment] Loading model from local dir: {model_source}") else: model_source = _HF_MODEL_ID print(f"[Sentiment] Local model not found. Downloading from HuggingFace: {model_source}") device = 0 if torch.cuda.is_available() else -1 _pipeline = hf_pipeline( "text-classification", model=model_source, tokenizer=model_source, device=device, truncation=True, max_length=256, return_all_scores=False, ) print("[Sentiment] Model loaded successfully.") return _pipeline # ── Helpers ──────────────────────────────────────────────────────────────────── def _normalize_label(lbl: str) -> str: """Normalise raw model label to 'positif', 'negatif', or 'netral'.""" l = lbl.lower() if l in ("positif", "positive", "pos"): return "positif" if l in ("negatif", "negative", "neg"): return "negatif" if l in ("netral", "neutral", "neu"): return "netral" if "label_" in l: try: from transformers import AutoConfig cfg = AutoConfig.from_pretrained(_HF_MODEL_ID) idx = int(l.split("_")[-1]) return _normalize_label(cfg.id2label[idx]) except Exception: return "netral" return "netral" # ── Keywords Override ────────────────────────────────────────────────────────── _NEGATIVE_KEYWORDS = { "bego", "bodoh", "jelek", "goblok", "bangsat", "kampungan", "tolol", "kontol", "kirik", "koplok", "anjing", "babi", "monyet", "belegug", "kik", "goblog", "kntl", "buruk", "lemah", "rendah", "gagal", "hancur", "rusak", "cacat", "jahat", "dusta", "bohong", "fitnah", "korup", "curang", "palsu", "salah", "sesat", "kejam", "dendam", "malas", "lambat", "menyakitkan", "tercela", "merugikan", "menghina", "melecehkan", "menyesatkan" } _POSITIVE_KEYWORDS = { "bagus", "hebat", "mantap", "luar biasa", "keren", "canggih", "cerdas", "pintar", "senang", "bahagia", "memuaskan", "unggul", "sempurna", "berhasil", "luas", "indah" } _NEUTRAL_KEYWORDS = { "ok", "oke", "biasa", "lumayan", "standar", "normal", "cukup", "agak" } def _override_label(text: str, model_label: str) -> str: text_lower = text.lower() if any(w in text_lower for w in _NEGATIVE_KEYWORDS): return "negatif" if any(w in text_lower for w in _POSITIVE_KEYWORDS): return "positif" if any(w in text_lower for w in _NEUTRAL_KEYWORDS): return "netral" return model_label # ── Public API ───────────────────────────────────────────────────────────────── def analyze_sentiment(texts: list) -> dict: """ Run sentiment analysis on a list of text strings. Args: texts: list of pre-processed strings Returns: dict with keys: positif, negatif, netral, total, detail Example: { "positif": 12, "negatif": 4, "netral": 6, "total": 22, "detail": [{"text": "...", "label": "positif", "score": 0.95}, ...] } """ if not texts: return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []} # Filter out empty strings texts = [t for t in texts if t and t.strip()] if not texts: return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []} clf = _load_pipeline() try: preds = clf(texts, batch_size=16, truncation=True) except Exception as e: print(f"[Sentiment] Prediction error: {e}") return {"positif": 0, "negatif": 0, "netral": 0, "total": len(texts), "detail": []} counts = {"positif": 0, "negatif": 0, "netral": 0} detail = [] for text, pred in zip(texts, preds): model_label = _normalize_label(pred["label"]) final_label = _override_label(text, model_label) counts[final_label] += 1 detail.append({ "text": text[:200], "label": final_label, "score": round(float(pred["score"]), 4), }) return { "positif": counts["positif"], "negatif": counts["negatif"], "netral": counts["netral"], "total": len(texts), "detail": detail, }