Darendra commited on
Commit
dafa625
Β·
verified Β·
1 Parent(s): 74dd21d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -15
app.py CHANGED
@@ -28,6 +28,7 @@ ACTIVE_MODEL_POINTER = "active_model_path.txt"
28
  class ModelEmosi(nn.Module):
29
  def __init__(self, base_model_name, num_labels=8):
30
  super().__init__()
 
31
  self.config = AutoConfig.from_pretrained(base_model_name)
32
  self.base = AutoModel.from_pretrained(base_model_name)
33
  self.dropout = nn.Dropout(0.3)
@@ -38,6 +39,7 @@ class ModelEmosi(nn.Module):
38
  if hasattr(out, "pooler_output") and out.pooler_output is not None:
39
  x = out.pooler_output
40
  else:
 
41
  x = out.last_hidden_state[:, 0, :]
42
  return self.classifier(self.dropout(x))
43
 
@@ -45,9 +47,12 @@ class ModelEmosi(nn.Module):
45
  # 3. HELPER FUNCTIONS
46
  # =========================================================
47
  def clean_data(df):
 
48
  for l in LIST_LABEL:
49
  if l not in df.columns: df[l] = 0
50
  df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
 
 
51
  if "text" in df.columns:
52
  df["text"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
53
  return df
@@ -64,7 +69,7 @@ def set_active_model_path(path):
64
  f.write(str(path))
65
 
66
  # =========================================================
67
- # 4. LOGIKA TRAINING (CPU)
68
  # =========================================================
69
  def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Progress()):
70
  yield "⏳ Membaca dataset...", None
@@ -76,6 +81,7 @@ def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Pr
76
  return
77
 
78
  device = "cpu"
 
79
  model_name = "bert-base-multilingual-cased"
80
  tokenizer = AutoTokenizer.from_pretrained(model_name)
81
 
@@ -107,18 +113,20 @@ def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Pr
107
  optimizer.step()
108
  total_loss += loss.item()
109
 
 
110
  if step % 5 == 0:
111
  progress((ep * len(train_loader) + step) / (int(epochs) * len(train_loader)))
112
 
113
  avg_loss = total_loss / len(train_loader)
114
  log_text += f"βœ… Epoch {ep+1} | Loss: {avg_loss:.4f}\n"
115
  yield log_text, None
116
-
 
117
  model.base.save_pretrained(DIR_TRAINED)
118
  tokenizer.save_pretrained(DIR_TRAINED)
119
  torch.save(model.classifier.state_dict(), DIR_TRAINED / "classifier_head.pt")
120
- set_active_model_path(DIR_TRAINED)
121
 
 
122
  yield log_text + "\nπŸŽ‰ Selesai & Disimpan!", "Model Lokal (Baru Dilatih)"
123
 
124
  # =========================================================
@@ -133,7 +141,7 @@ def handle_zip_upload(file_obj):
133
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
134
  zip_ref.extractall(DIR_UPLOADED)
135
 
136
- # Handle jika ada subfolder
137
  files_in_dir = list(DIR_UPLOADED.iterdir())
138
  if len(files_in_dir) == 1 and files_in_dir[0].is_dir():
139
  subfolder = files_in_dir[0]
@@ -154,15 +162,14 @@ def load_model_inference():
154
  if not path: raise ValueError("Belum ada model aktif.")
155
 
156
  path = Path(path)
157
- config = AutoConfig.from_pretrained(path)
158
  tokenizer = AutoTokenizer.from_pretrained(path)
159
  model = ModelEmosi(path)
160
 
161
  head_path = path / "classifier_head.pt"
162
  if head_path.exists():
163
  model.classifier.load_state_dict(torch.load(head_path, map_location="cpu"))
 
164
 
165
- model.eval()
166
  return model, tokenizer
167
 
168
  def predict_text(text):
@@ -170,9 +177,11 @@ def predict_text(text):
170
  try:
171
  model, tokenizer = load_model_inference()
172
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
 
173
  with torch.no_grad():
174
  out = model(inputs["input_ids"], inputs["attention_mask"])
175
  probs = torch.sigmoid(out).numpy()[0]
 
176
  return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
177
  except Exception as e:
178
  return {"Error": str(e)}
@@ -182,6 +191,7 @@ def predict_csv(file_obj, sep):
182
  df = pd.read_csv(file_obj.name, sep=sep)
183
  df = clean_data(df)
184
  model, tokenizer = load_model_inference()
 
185
  results = []
186
  for txt in df["text"]:
187
  inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
@@ -190,21 +200,27 @@ def predict_csv(file_obj, sep):
190
  probs = torch.sigmoid(out).numpy()[0]
191
  results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
192
 
 
193
  avg = {l: 0.0 for l in LIST_LABEL}
194
  for r in results:
195
  for l,v in r.items(): avg[l] += v
196
  for l in avg: avg[l] /= len(results)
197
 
198
  top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
199
- return {"Total Data": len(results), "Top 3 Emosi": {k: round(v,4) for k,v in top3}, "Rata-rata": avg}
 
 
 
 
 
200
  except Exception as e:
201
  return {"Error": str(e)}
202
 
203
  # =========================================================
204
- # 7. TAMPILAN ANTARMUKA (UI)
205
  # =========================================================
206
  with gr.Blocks(title="Emotion AI Manager") as app:
207
- gr.Markdown("# 🎭 AI Emotion Classifier System")
208
 
209
  # Status Bar Global
210
  lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model yang dipilih.", interactive=False)
@@ -213,9 +229,9 @@ with gr.Blocks(title="Emotion AI Manager") as app:
213
  with gr.Tab("βš™οΈ Pelatihan & Model"):
214
  with gr.Tabs():
215
 
216
- # Sub-Tab 1.1: Upload (Paling Recommended)
217
- with gr.Tab("πŸ“‚ Upload Pretrained (Recommended)"):
218
- gr.Markdown("Gunakan model hasil training GPU (Colab) agar cepat.")
219
  in_zip = gr.File(label="Upload File .zip Model", file_types=[".zip"])
220
  btn_upload = gr.Button("Ekstrak & Aktifkan Model", variant="primary")
221
  out_log_upload = gr.Textbox(label="Log Sistem")
@@ -223,8 +239,8 @@ with gr.Blocks(title="Emotion AI Manager") as app:
223
  btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
224
 
225
  # Sub-Tab 1.2: Latihan Manual
226
- with gr.Tab("πŸ‹οΈβ€β™€οΈ Latihan Manual (CPU)"):
227
- gr.Markdown("⚠️ Lambat di Hugging Face Space. Gunakan data kecil saja.")
228
  with gr.Row():
229
  in_csv = gr.File(label="Dataset CSV")
230
  in_sep = gr.Textbox(label="Separator", value=";")
@@ -239,7 +255,7 @@ with gr.Blocks(title="Emotion AI Manager") as app:
239
  btn_train.click(run_training_generator, inputs=[in_csv, in_sep, in_ep, in_bs, in_lr], outputs=[out_log_train, lbl_active_model])
240
 
241
  # TAB UTAMA 2: PENGUJIAN
242
- with gr.Tab("πŸ§ͺ Pengujian (Testing)"):
243
  with gr.Tabs():
244
 
245
  # Sub-Tab 2.1: Uji Tunggal
 
28
  class ModelEmosi(nn.Module):
29
  def __init__(self, base_model_name, num_labels=8):
30
  super().__init__()
31
+ # Load config agar fleksibel (bisa baca dari folder atau nama model)
32
  self.config = AutoConfig.from_pretrained(base_model_name)
33
  self.base = AutoModel.from_pretrained(base_model_name)
34
  self.dropout = nn.Dropout(0.3)
 
39
  if hasattr(out, "pooler_output") and out.pooler_output is not None:
40
  x = out.pooler_output
41
  else:
42
+ # Fallback jika model tidak punya pooler (misal DistilBERT)
43
  x = out.last_hidden_state[:, 0, :]
44
  return self.classifier(self.dropout(x))
45
 
 
47
  # 3. HELPER FUNCTIONS
48
  # =========================================================
49
  def clean_data(df):
50
+ # Pastikan kolom label ada dan bertipe float
51
  for l in LIST_LABEL:
52
  if l not in df.columns: df[l] = 0
53
  df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
54
+
55
+ # Bersihkan teks
56
  if "text" in df.columns:
57
  df["text"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
58
  return df
 
69
  f.write(str(path))
70
 
71
  # =========================================================
72
+ # 4. LOGIKA TRAINING (CPU - HANYA UNTUK DATA KECIL)
73
  # =========================================================
74
  def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Progress()):
75
  yield "⏳ Membaca dataset...", None
 
81
  return
82
 
83
  device = "cpu"
84
+ # Default model dasar untuk training manual di CPU
85
  model_name = "bert-base-multilingual-cased"
86
  tokenizer = AutoTokenizer.from_pretrained(model_name)
87
 
 
113
  optimizer.step()
114
  total_loss += loss.item()
115
 
116
+ # Update progress bar setiap 5 step
117
  if step % 5 == 0:
118
  progress((ep * len(train_loader) + step) / (int(epochs) * len(train_loader)))
119
 
120
  avg_loss = total_loss / len(train_loader)
121
  log_text += f"βœ… Epoch {ep+1} | Loss: {avg_loss:.4f}\n"
122
  yield log_text, None
123
+
124
+ # Simpan Model
125
  model.base.save_pretrained(DIR_TRAINED)
126
  tokenizer.save_pretrained(DIR_TRAINED)
127
  torch.save(model.classifier.state_dict(), DIR_TRAINED / "classifier_head.pt")
 
128
 
129
+ set_active_model_path(DIR_TRAINED)
130
  yield log_text + "\nπŸŽ‰ Selesai & Disimpan!", "Model Lokal (Baru Dilatih)"
131
 
132
  # =========================================================
 
141
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
142
  zip_ref.extractall(DIR_UPLOADED)
143
 
144
+ # Handle jika zip membungkus folder (bukan isi file langsung)
145
  files_in_dir = list(DIR_UPLOADED.iterdir())
146
  if len(files_in_dir) == 1 and files_in_dir[0].is_dir():
147
  subfolder = files_in_dir[0]
 
162
  if not path: raise ValueError("Belum ada model aktif.")
163
 
164
  path = Path(path)
 
165
  tokenizer = AutoTokenizer.from_pretrained(path)
166
  model = ModelEmosi(path)
167
 
168
  head_path = path / "classifier_head.pt"
169
  if head_path.exists():
170
  model.classifier.load_state_dict(torch.load(head_path, map_location="cpu"))
171
+ model.eval()
172
 
 
173
  return model, tokenizer
174
 
175
  def predict_text(text):
 
177
  try:
178
  model, tokenizer = load_model_inference()
179
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
180
+
181
  with torch.no_grad():
182
  out = model(inputs["input_ids"], inputs["attention_mask"])
183
  probs = torch.sigmoid(out).numpy()[0]
184
+
185
  return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
186
  except Exception as e:
187
  return {"Error": str(e)}
 
191
  df = pd.read_csv(file_obj.name, sep=sep)
192
  df = clean_data(df)
193
  model, tokenizer = load_model_inference()
194
+
195
  results = []
196
  for txt in df["text"]:
197
  inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
 
200
  probs = torch.sigmoid(out).numpy()[0]
201
  results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
202
 
203
+ # Hitung statistik
204
  avg = {l: 0.0 for l in LIST_LABEL}
205
  for r in results:
206
  for l,v in r.items(): avg[l] += v
207
  for l in avg: avg[l] /= len(results)
208
 
209
  top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
210
+
211
+ return {
212
+ "Total Data": len(results),
213
+ "Top 3 Emosi Dominan": {k: round(v,4) for k,v in top3},
214
+ "Rata-rata Skor": avg
215
+ }
216
  except Exception as e:
217
  return {"Error": str(e)}
218
 
219
  # =========================================================
220
+ # 7. TAMPILAN ANTARMUKA (UI GRADIO)
221
  # =========================================================
222
  with gr.Blocks(title="Emotion AI Manager") as app:
223
+ gr.Markdown("#AI Emotion Classifier System")
224
 
225
  # Status Bar Global
226
  lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model yang dipilih.", interactive=False)
 
229
  with gr.Tab("βš™οΈ Pelatihan & Model"):
230
  with gr.Tabs():
231
 
232
+ # Sub-Tab 1.1: Upload Pretrained Model
233
+ with gr.Tab("πŸ“‚ Upload Pretrained Model"):
234
+ gr.Markdown("Sudah punya model terlatih? gunakan model hasil training model")
235
  in_zip = gr.File(label="Upload File .zip Model", file_types=[".zip"])
236
  btn_upload = gr.Button("Ekstrak & Aktifkan Model", variant="primary")
237
  out_log_upload = gr.Textbox(label="Log Sistem")
 
239
  btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
240
 
241
  # Sub-Tab 1.2: Latihan Manual
242
+ with gr.Tab("πŸ‹οΈβ€β™€οΈ Latihan Manual"):
243
+ gr.Markdown("Belum punya model? latih file csv [text;label emosi (1/0)]")
244
  with gr.Row():
245
  in_csv = gr.File(label="Dataset CSV")
246
  in_sep = gr.Textbox(label="Separator", value=";")
 
255
  btn_train.click(run_training_generator, inputs=[in_csv, in_sep, in_ep, in_bs, in_lr], outputs=[out_log_train, lbl_active_model])
256
 
257
  # TAB UTAMA 2: PENGUJIAN
258
+ with gr.Tab("πŸ§ͺ Testing"):
259
  with gr.Tabs():
260
 
261
  # Sub-Tab 2.1: Uji Tunggal