Spaces:
Sleeping
Sleeping
| """ | |
| services/sentiment.py | |
| Model sentimen berbasis IndoBERT / RoBERTa-ID. | |
| Torch di-import secara lazy agar tidak crash saat package belum siap. | |
| """ | |
| import os | |
| LOCAL_MODEL_PATH = "model/final_model" | |
| FALLBACK_MODEL = "w11wo/indonesian-roberta-base-sentiment-classifier" | |
| # ββ RULE-BASED FALLBACK ββ | |
| _POS_KW = [ | |
| "bagus","baik","senang","suka","mantap","keren","hebat","oke","setuju", | |
| "benar","sukses","berhasil","love","good","great","nice","best","amazing", | |
| "excellent","wonderful","happy","glad","positif","mendukung","bangga", | |
| "luar biasa","terima kasih","apresiasi","semangat","maju","berkembang", | |
| ] | |
| _NEG_KW = [ | |
| "buruk","jelek","benci","kecewa","gagal","salah","rugi","marah","bohong", | |
| "hoax","fitnah","jahat","tidak setuju","parah","malu","takut","bad", | |
| "worst","terrible","hate","fail","wrong","poor","awful","negatif","tolak", | |
| "menolak","turun","jatuh","hancur","krisis","masalah","bahaya","ancam", | |
| ] | |
| def _rule_based(text: str) -> str: | |
| lower = text.lower() | |
| pos = sum(1 for k in _POS_KW if k in lower) | |
| neg = sum(1 for k in _NEG_KW if k in lower) | |
| if pos > neg: return "Positive" | |
| if neg > pos: return "Negative" | |
| return "Neutral" | |
| # ββ MODEL LOADING ββ | |
| def _load_model(): | |
| try: | |
| import torch | |
| from transformers import pipeline | |
| path = LOCAL_MODEL_PATH if os.path.exists(LOCAL_MODEL_PATH) else FALLBACK_MODEL | |
| label = "fine-tuned" if os.path.exists(LOCAL_MODEL_PATH) else "fallback RoBERTa-ID" | |
| clf = pipeline( | |
| "sentiment-analysis", | |
| model=path, | |
| device=-1, | |
| truncation=True, | |
| max_length=512, | |
| ) | |
| print(f"β Sentiment model loaded: {label}") | |
| return clf | |
| except ImportError: | |
| print("β οΈ PyTorch tidak tersedia β rule-based fallback aktif") | |
| return None | |
| except Exception as e: | |
| print(f"β Gagal load sentiment model: {e}") | |
| return None | |
| classifier = _load_model() | |
| # ββ LABEL NORMALIZATION ββ | |
| def _normalize(label: str) -> str: | |
| label = label.lower() | |
| if "positive" in label or label == "label_2": return "Positive" | |
| if "negative" in label or label == "label_0": return "Negative" | |
| if "neutral" in label or label == "label_1": return "Neutral" | |
| return "Neutral" | |
| # ββ PUBLIC API ββ | |
| def predict(texts: list) -> list: | |
| """Return list of label strings.""" | |
| if not texts: return [] | |
| if classifier is None: | |
| return [_rule_based(t) for t in texts] | |
| try: | |
| outputs = classifier(texts, batch_size=8, truncation=True) | |
| return [_normalize(o["label"]) for o in outputs] | |
| except Exception as e: | |
| print(f"β predict() batch error: {e} β per-item fallback") | |
| results = [] | |
| for t in texts: | |
| try: | |
| out = classifier(t[:512], truncation=True) | |
| results.append(_normalize(out[0]["label"])) | |
| except Exception: | |
| results.append(_rule_based(t)) | |
| return results | |
| def predict_single(text: str) -> str: | |
| return predict([text])[0] | |
| def predict_with_score(texts: list) -> list: | |
| """ | |
| Return list of dicts: {label, score} | |
| score = confidence dari model (0β1). | |
| """ | |
| if not texts: return [] | |
| if classifier is None: | |
| return [{"label": _rule_based(t), "score": 0.5} for t in texts] | |
| try: | |
| outputs = classifier(texts, batch_size=8, truncation=True) | |
| return [ | |
| {"label": _normalize(o["label"]), "score": round(float(o["score"]), 4)} | |
| for o in outputs | |
| ] | |
| except Exception as e: | |
| print(f"β predict_with_score() error: {e} β per-item fallback") | |
| results = [] | |
| for t in texts: | |
| try: | |
| out = classifier(t[:512], truncation=True) | |
| results.append({ | |
| "label": _normalize(out[0]["label"]), | |
| "score": round(float(out[0]["score"]), 4) | |
| }) | |
| except Exception: | |
| results.append({"label": _rule_based(t), "score": 0.5}) | |
| return results |