Spaces:
Sleeping
Sleeping
File size: 4,131 Bytes
d4bf77c 47e459a 17d11b3 47e459a d4bf77c 17d11b3 d4bf77c 47e459a d4bf77c 47e459a 082a160 d4bf77c 082a160 d4bf77c 082a160 d4bf77c 082a160 d4bf77c 082a160 47e459a d4bf77c 47e459a d4bf77c 47e459a d4bf77c 47e459a d4bf77c 47e459a d4bf77c 082a160 d4bf77c 47e459a d4bf77c 280a8f1 47e459a d4bf77c 47e459a d4bf77c 082a160 d4bf77c 082a160 d4bf77c 082a160 47e459a 082a160 d4bf77c 47e459a d4bf77c 47e459a d4bf77c 47e459a d4bf77c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | """
services/sentiment.py
Model sentimen berbasis IndoBERT / RoBERTa-ID.
Torch di-import secara lazy agar tidak crash saat package belum siap.
"""
import os
LOCAL_MODEL_PATH = "model/final_model"
FALLBACK_MODEL = "w11wo/indonesian-roberta-base-sentiment-classifier"
# ββ RULE-BASED FALLBACK ββ
_POS_KW = [
"bagus","baik","senang","suka","mantap","keren","hebat","oke","setuju",
"benar","sukses","berhasil","love","good","great","nice","best","amazing",
"excellent","wonderful","happy","glad","positif","mendukung","bangga",
"luar biasa","terima kasih","apresiasi","semangat","maju","berkembang",
]
_NEG_KW = [
"buruk","jelek","benci","kecewa","gagal","salah","rugi","marah","bohong",
"hoax","fitnah","jahat","tidak setuju","parah","malu","takut","bad",
"worst","terrible","hate","fail","wrong","poor","awful","negatif","tolak",
"menolak","turun","jatuh","hancur","krisis","masalah","bahaya","ancam",
]
def _rule_based(text: str) -> str:
lower = text.lower()
pos = sum(1 for k in _POS_KW if k in lower)
neg = sum(1 for k in _NEG_KW if k in lower)
if pos > neg: return "Positive"
if neg > pos: return "Negative"
return "Neutral"
# ββ MODEL LOADING ββ
def _load_model():
try:
import torch
from transformers import pipeline
path = LOCAL_MODEL_PATH if os.path.exists(LOCAL_MODEL_PATH) else FALLBACK_MODEL
label = "fine-tuned" if os.path.exists(LOCAL_MODEL_PATH) else "fallback RoBERTa-ID"
clf = pipeline(
"sentiment-analysis",
model=path,
device=-1,
truncation=True,
max_length=512,
)
print(f"β
Sentiment model loaded: {label}")
return clf
except ImportError:
print("β οΈ PyTorch tidak tersedia β rule-based fallback aktif")
return None
except Exception as e:
print(f"β Gagal load sentiment model: {e}")
return None
classifier = _load_model()
# ββ LABEL NORMALIZATION ββ
def _normalize(label: str) -> str:
label = label.lower()
if "positive" in label or label == "label_2": return "Positive"
if "negative" in label or label == "label_0": return "Negative"
if "neutral" in label or label == "label_1": return "Neutral"
return "Neutral"
# ββ PUBLIC API ββ
def predict(texts: list) -> list:
"""Return list of label strings."""
if not texts: return []
if classifier is None:
return [_rule_based(t) for t in texts]
try:
outputs = classifier(texts, batch_size=8, truncation=True)
return [_normalize(o["label"]) for o in outputs]
except Exception as e:
print(f"β predict() batch error: {e} β per-item fallback")
results = []
for t in texts:
try:
out = classifier(t[:512], truncation=True)
results.append(_normalize(out[0]["label"]))
except Exception:
results.append(_rule_based(t))
return results
def predict_single(text: str) -> str:
return predict([text])[0]
def predict_with_score(texts: list) -> list:
"""
Return list of dicts: {label, score}
score = confidence dari model (0β1).
"""
if not texts: return []
if classifier is None:
return [{"label": _rule_based(t), "score": 0.5} for t in texts]
try:
outputs = classifier(texts, batch_size=8, truncation=True)
return [
{"label": _normalize(o["label"]), "score": round(float(o["score"]), 4)}
for o in outputs
]
except Exception as e:
print(f"β predict_with_score() error: {e} β per-item fallback")
results = []
for t in texts:
try:
out = classifier(t[:512], truncation=True)
results.append({
"label": _normalize(out[0]["label"]),
"score": round(float(out[0]["score"]), 4)
})
except Exception:
results.append({"label": _rule_based(t), "score": 0.5})
return results |