noranisa commited on
Commit
d4bf77c
Β·
verified Β·
1 Parent(s): bb0c650

Update services/sentiment.py

Browse files
Files changed (1) hide show
  1. services/sentiment.py +75 -66
services/sentiment.py CHANGED
@@ -1,118 +1,127 @@
 
 
 
 
 
 
1
  import os
2
 
3
- # ── PATH MODEL FINE-TUNING ──
4
  LOCAL_MODEL_PATH = "model/final_model"
 
5
 
6
- # ── FALLBACK: model pretrained HuggingFace ──
7
- FALLBACK_MODEL = "w11wo/indonesian-roberta-base-sentiment-classifier"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
- def load_model():
11
- """
12
- Load pipeline sentimen. Urutan prioritas:
13
- 1. Model fine-tuned lokal (jika ada)
14
- 2. Model pretrained dari HuggingFace Hub
15
- 3. None β†’ fallback ke rule-based
16
- """
17
  try:
18
- # import di dalam fungsi agar tidak crash saat torch tidak tersedia
19
  import torch
20
  from transformers import pipeline
21
 
22
- model_path = LOCAL_MODEL_PATH if os.path.exists(LOCAL_MODEL_PATH) else FALLBACK_MODEL
23
- label = "fine-tuned" if os.path.exists(LOCAL_MODEL_PATH) else "fallback RoBERTa"
24
 
25
  clf = pipeline(
26
  "sentiment-analysis",
27
- model=model_path,
28
- device=-1, # CPU-only (HF Spaces free tier)
29
  truncation=True,
30
  max_length=512,
31
  )
32
- print(f"βœ… Model loaded: {label}")
33
  return clf
34
 
35
  except ImportError:
36
- print("⚠️ PyTorch tidak tersedia β€” menggunakan rule-based fallback")
37
  return None
38
  except Exception as e:
39
- print(f"❌ Gagal load model: {e}")
40
  return None
41
 
42
 
43
- # Load sekali saat startup
44
- classifier = load_model()
45
 
46
 
47
- # ── NORMALISASI LABEL ──
48
- def normalize_label(label: str) -> str:
49
  label = label.lower()
50
- if "positive" in label or label == "label_2":
51
- return "Positive"
52
- if "negative" in label or label == "label_0":
53
- return "Negative"
54
- if "neutral" in label or label == "label_1":
55
- return "Neutral"
56
- return "Neutral"
57
-
58
-
59
- # ── RULE-BASED FALLBACK ──
60
- POS_KW = ["bagus","baik","senang","suka","mantap","keren","hebat","oke","setuju",
61
- "benar","bagus","sukses","berhasil","love","good","great","nice","best",
62
- "amazing","excellent","wonderful","happy","glad"]
63
- NEG_KW = ["buruk","jelek","benci","kecewa","gagal","salah","rugi","marah","bohong",
64
- "hoax","fitnah","jahat","tidak setuju","parah","malu","takut",
65
- "bad","worst","terrible","hate","fail","wrong","poor","awful"]
66
-
67
- def rule_based(text: str) -> str:
68
- lower = text.lower()
69
- pos = sum(1 for k in POS_KW if k in lower)
70
- neg = sum(1 for k in NEG_KW if k in lower)
71
- if pos > neg:
72
- return "Positive"
73
- if neg > pos:
74
- return "Negative"
75
  return "Neutral"
76
 
77
 
78
- # ── PREDIKSI UTAMA ──
79
  def predict(texts: list) -> list:
80
- if not texts:
81
- return []
82
-
83
  if classifier is None:
84
- print("⚠️ Classifier tidak tersedia β†’ rule-based")
85
- return [rule_based(t) for t in texts]
86
-
87
  try:
88
  outputs = classifier(texts, batch_size=8, truncation=True)
89
- return [normalize_label(o["label"]) for o in outputs]
90
  except Exception as e:
91
- print(f"❌ Error saat prediksi batch: {e}")
92
- # per-item fallback
93
  results = []
94
  for t in texts:
95
  try:
96
  out = classifier(t[:512], truncation=True)
97
- results.append(normalize_label(out[0]["label"]))
98
  except Exception:
99
- results.append(rule_based(t))
100
  return results
101
 
102
 
103
- # ── PREDICT SINGLE ──
104
  def predict_single(text: str) -> str:
105
  return predict([text])[0]
106
 
107
 
108
- # ── PREDICT WITH SCORE ──
109
  def predict_with_score(texts: list) -> list:
 
 
 
 
 
 
110
  if classifier is None:
111
- return [{"label": rule_based(t), "score": 0.0} for t in texts]
 
112
  try:
113
  outputs = classifier(texts, batch_size=8, truncation=True)
114
- return [{"label": normalize_label(o["label"]), "score": round(o["score"], 4)}
115
- for o in outputs]
 
 
116
  except Exception as e:
117
- print(f"❌ Error predict_with_score: {e}")
118
- return [{"label": rule_based(t), "score": 0.0} for t in texts]
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ services/sentiment.py
3
+ Model sentimen berbasis IndoBERT / RoBERTa-ID.
4
+ Torch di-import secara lazy agar tidak crash saat package belum siap.
5
+ """
6
+
7
  import os
8
 
 
9
  LOCAL_MODEL_PATH = "model/final_model"
10
+ FALLBACK_MODEL = "w11wo/indonesian-roberta-base-sentiment-classifier"
11
 
12
+ # ── RULE-BASED FALLBACK ──
13
+ _POS_KW = [
14
+ "bagus","baik","senang","suka","mantap","keren","hebat","oke","setuju",
15
+ "benar","sukses","berhasil","love","good","great","nice","best","amazing",
16
+ "excellent","wonderful","happy","glad","positif","mendukung","bangga",
17
+ "luar biasa","terima kasih","apresiasi","semangat","maju","berkembang",
18
+ ]
19
+ _NEG_KW = [
20
+ "buruk","jelek","benci","kecewa","gagal","salah","rugi","marah","bohong",
21
+ "hoax","fitnah","jahat","tidak setuju","parah","malu","takut","bad",
22
+ "worst","terrible","hate","fail","wrong","poor","awful","negatif","tolak",
23
+ "menolak","turun","jatuh","hancur","krisis","masalah","bahaya","ancam",
24
+ ]
25
+
26
+ def _rule_based(text: str) -> str:
27
+ lower = text.lower()
28
+ pos = sum(1 for k in _POS_KW if k in lower)
29
+ neg = sum(1 for k in _NEG_KW if k in lower)
30
+ if pos > neg: return "Positive"
31
+ if neg > pos: return "Negative"
32
+ return "Neutral"
33
 
34
 
35
+ # ── MODEL LOADING ──
36
+ def _load_model():
 
 
 
 
 
37
  try:
 
38
  import torch
39
  from transformers import pipeline
40
 
41
+ path = LOCAL_MODEL_PATH if os.path.exists(LOCAL_MODEL_PATH) else FALLBACK_MODEL
42
+ label = "fine-tuned" if os.path.exists(LOCAL_MODEL_PATH) else "fallback RoBERTa-ID"
43
 
44
  clf = pipeline(
45
  "sentiment-analysis",
46
+ model=path,
47
+ device=-1,
48
  truncation=True,
49
  max_length=512,
50
  )
51
+ print(f"βœ… Sentiment model loaded: {label}")
52
  return clf
53
 
54
  except ImportError:
55
+ print("⚠️ PyTorch tidak tersedia β€” rule-based fallback aktif")
56
  return None
57
  except Exception as e:
58
+ print(f"❌ Gagal load sentiment model: {e}")
59
  return None
60
 
61
 
62
+ classifier = _load_model()
 
63
 
64
 
65
+ # ── LABEL NORMALIZATION ──
66
+ def _normalize(label: str) -> str:
67
  label = label.lower()
68
+ if "positive" in label or label == "label_2": return "Positive"
69
+ if "negative" in label or label == "label_0": return "Negative"
70
+ if "neutral" in label or label == "label_1": return "Neutral"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  return "Neutral"
72
 
73
 
74
+ # ── PUBLIC API ──
75
  def predict(texts: list) -> list:
76
+ """Return list of label strings."""
77
+ if not texts: return []
 
78
  if classifier is None:
79
+ return [_rule_based(t) for t in texts]
 
 
80
  try:
81
  outputs = classifier(texts, batch_size=8, truncation=True)
82
+ return [_normalize(o["label"]) for o in outputs]
83
  except Exception as e:
84
+ print(f"❌ predict() batch error: {e} β€” per-item fallback")
 
85
  results = []
86
  for t in texts:
87
  try:
88
  out = classifier(t[:512], truncation=True)
89
+ results.append(_normalize(out[0]["label"]))
90
  except Exception:
91
+ results.append(_rule_based(t))
92
  return results
93
 
94
 
 
95
  def predict_single(text: str) -> str:
96
  return predict([text])[0]
97
 
98
 
 
99
  def predict_with_score(texts: list) -> list:
100
+ """
101
+ Return list of dicts: {label, score}
102
+ score = confidence dari model (0–1).
103
+ """
104
+ if not texts: return []
105
+
106
  if classifier is None:
107
+ return [{"label": _rule_based(t), "score": 0.5} for t in texts]
108
+
109
  try:
110
  outputs = classifier(texts, batch_size=8, truncation=True)
111
+ return [
112
+ {"label": _normalize(o["label"]), "score": round(float(o["score"]), 4)}
113
+ for o in outputs
114
+ ]
115
  except Exception as e:
116
+ print(f"❌ predict_with_score() error: {e} β€” per-item fallback")
117
+ results = []
118
+ for t in texts:
119
+ try:
120
+ out = classifier(t[:512], truncation=True)
121
+ results.append({
122
+ "label": _normalize(out[0]["label"]),
123
+ "score": round(float(out[0]["score"]), 4)
124
+ })
125
+ except Exception:
126
+ results.append({"label": _rule_based(t), "score": 0.5})
127
+ return results