Spaces:

NzTama
/

Sentiment

Runtime error

App Files Files Community

Sentiment / services /sentiment.py

NzTama

Initial clean deploy: Sentiment Analysis

fa8ff66 2 months ago

raw

history blame contribute delete

5.91 kB

	"""
	sentiment.py – Sentiment analysis using IndoBERT / HuggingFace pipeline.
	Model is loaded lazily (first call) to avoid crashing at import time.
	"""
	from __future__ import annotations

	import os
	from typing import Optional

	# ── Model configuration ────────────────────────────────────────────────────────
	# If you have a local fine-tuned model, place it in ./indoBERT-sentiment
	# and set MODEL_DIR. Otherwise it downloads from HuggingFace.
	_LOCAL_MODEL_DIR = os.path.join(os.path.dirname(__file__), "..", "indoBERT-sentiment")
	_HF_MODEL_ID = "taufiqdp/indonesian-sentiment"

	# ── Lazy-loaded globals ────────────────────────────────────────────────────────
	_pipeline: Optional[object] = None


	def _load_pipeline():
	global _pipeline
	if _pipeline is not None:
	return _pipeline

	import torch
	from transformers import pipeline as hf_pipeline

	# Prefer local model if it exists (avoids repeated downloads in Docker)
	if os.path.isdir(_LOCAL_MODEL_DIR) and os.listdir(_LOCAL_MODEL_DIR):
	model_source = _LOCAL_MODEL_DIR
	print(f"[Sentiment] Loading model from local dir: {model_source}")
	else:
	model_source = _HF_MODEL_ID
	print(f"[Sentiment] Local model not found. Downloading from HuggingFace: {model_source}")

	device = 0 if torch.cuda.is_available() else -1

	_pipeline = hf_pipeline(
	"text-classification",
	model=model_source,
	tokenizer=model_source,
	device=device,
	truncation=True,
	max_length=256,
	return_all_scores=False,
	)
	print("[Sentiment] Model loaded successfully.")
	return _pipeline


	# ── Helpers ────────────────────────────────────────────────────────────────────

	def _normalize_label(lbl: str) -> str:
	"""Normalise raw model label to 'positif', 'negatif', or 'netral'."""
	l = lbl.lower()
	if l in ("positif", "positive", "pos"):
	return "positif"
	if l in ("negatif", "negative", "neg"):
	return "negatif"
	if l in ("netral", "neutral", "neu"):
	return "netral"
	if "label_" in l:
	try:
	from transformers import AutoConfig
	cfg = AutoConfig.from_pretrained(_HF_MODEL_ID)
	idx = int(l.split("_")[-1])
	return _normalize_label(cfg.id2label[idx])
	except Exception:
	return "netral"
	return "netral"


	# ── Keywords Override ──────────────────────────────────────────────────────────

	_NEGATIVE_KEYWORDS = {
	"bego", "bodoh", "jelek", "goblok", "bangsat", "kampungan", "tolol",
	"kontol", "kirik", "koplok", "anjing", "babi", "monyet", "belegug",
	"kik", "goblog", "kntl", "buruk", "lemah", "rendah", "gagal", "hancur",
	"rusak", "cacat", "jahat", "dusta", "bohong", "fitnah", "korup", "curang",
	"palsu", "salah", "sesat", "kejam", "dendam", "malas", "lambat", "menyakitkan",
	"tercela", "merugikan", "menghina", "melecehkan", "menyesatkan"
	}

	_POSITIVE_KEYWORDS = {
	"bagus", "hebat", "mantap", "luar biasa", "keren", "canggih",
	"cerdas", "pintar", "senang", "bahagia", "memuaskan", "unggul",
	"sempurna", "berhasil", "luas", "indah"
	}

	_NEUTRAL_KEYWORDS = {
	"ok", "oke", "biasa", "lumayan", "standar", "normal", "cukup", "agak"
	}

	def _override_label(text: str, model_label: str) -> str:
	text_lower = text.lower()

	if any(w in text_lower for w in _NEGATIVE_KEYWORDS):
	return "negatif"
	if any(w in text_lower for w in _POSITIVE_KEYWORDS):
	return "positif"
	if any(w in text_lower for w in _NEUTRAL_KEYWORDS):
	return "netral"

	return model_label


	# ── Public API ─────────────────────────────────────────────────────────────────

	def analyze_sentiment(texts: list) -> dict:
	"""
	Run sentiment analysis on a list of text strings.

	Args:
	texts: list of pre-processed strings

	Returns:
	dict with keys: positif, negatif, netral, total, detail
	Example:
	{
	"positif": 12, "negatif": 4, "netral": 6, "total": 22,
	"detail": [{"text": "...", "label": "positif", "score": 0.95}, ...]
	}
	"""
	if not texts:
	return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []}

	# Filter out empty strings
	texts = [t for t in texts if t and t.strip()]
	if not texts:
	return {"positif": 0, "negatif": 0, "netral": 0, "total": 0, "detail": []}

	clf = _load_pipeline()

	try:
	preds = clf(texts, batch_size=16, truncation=True)
	except Exception as e:
	print(f"[Sentiment] Prediction error: {e}")
	return {"positif": 0, "negatif": 0, "netral": 0, "total": len(texts), "detail": []}

	counts = {"positif": 0, "negatif": 0, "netral": 0}
	detail = []
	for text, pred in zip(texts, preds):
	model_label = _normalize_label(pred["label"])
	final_label = _override_label(text, model_label)

	counts[final_label] += 1
	detail.append({
	"text": text[:200],
	"label": final_label,
	"score": round(float(pred["score"]), 4),
	})

	return {
	"positif": counts["positif"],
	"negatif": counts["negatif"],
	"netral": counts["netral"],
	"total": len(texts),
	"detail": detail,
	}