Darendra commited on
Commit
02adcda
Β·
verified Β·
1 Parent(s): dafa625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -172
app.py CHANGED
@@ -5,56 +5,51 @@ import numpy as np
5
  import gradio as gr
6
  import zipfile
7
  import shutil
 
8
  from pathlib import Path
9
  from torch import nn
10
  from torch.utils.data import DataLoader, TensorDataset
11
- from transformers import AutoTokenizer, AutoModel, AutoConfig
12
 
13
  # =========================================================
14
  # 1. KONFIGURASI & SETUP
15
  # =========================================================
16
- LIST_LABEL = ['anger','anticipation','disgust','fear','joy','sadness','surprise','trust']
17
- DIR_TRAINED = Path("saved_models/trained_local")
18
- DIR_UPLOADED = Path("saved_models/uploaded_colab")
19
 
20
- DIR_TRAINED.mkdir(parents=True, exist_ok=True)
21
- DIR_UPLOADED.mkdir(parents=True, exist_ok=True)
22
 
23
- ACTIVE_MODEL_POINTER = "active_model_path.txt"
 
 
 
 
 
24
 
25
- # =========================================================
26
- # 2. ARSITEKTUR MODEL
27
- # =========================================================
28
- class ModelEmosi(nn.Module):
29
- def __init__(self, base_model_name, num_labels=8):
30
- super().__init__()
31
- # Load config agar fleksibel (bisa baca dari folder atau nama model)
32
- self.config = AutoConfig.from_pretrained(base_model_name)
33
- self.base = AutoModel.from_pretrained(base_model_name)
34
- self.dropout = nn.Dropout(0.3)
35
- self.classifier = nn.Linear(self.config.hidden_size, num_labels)
36
 
37
- def forward(self, input_ids, attention_mask):
38
- out = self.base(input_ids=input_ids, attention_mask=attention_mask)
39
- if hasattr(out, "pooler_output") and out.pooler_output is not None:
40
- x = out.pooler_output
41
- else:
42
- # Fallback jika model tidak punya pooler (misal DistilBERT)
43
- x = out.last_hidden_state[:, 0, :]
44
- return self.classifier(self.dropout(x))
45
 
46
  # =========================================================
47
- # 3. HELPER FUNCTIONS
48
  # =========================================================
49
  def clean_data(df):
50
- # Pastikan kolom label ada dan bertipe float
51
  for l in LIST_LABEL:
52
  if l not in df.columns: df[l] = 0
 
 
53
  df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
54
 
55
  # Bersihkan teks
56
- if "text" in df.columns:
57
- df["text"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
 
 
 
 
58
  return df
59
 
60
  def get_active_model_path():
@@ -69,108 +64,48 @@ def set_active_model_path(path):
69
  f.write(str(path))
70
 
71
  # =========================================================
72
- # 4. LOGIKA TRAINING (CPU - HANYA UNTUK DATA KECIL)
73
- # =========================================================
74
- def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Progress()):
75
- yield "⏳ Membaca dataset...", None
76
- try:
77
- df = pd.read_csv(file_obj.name, sep=sep)
78
- df = clean_data(df)
79
- except Exception as e:
80
- yield f"❌ Error: {str(e)}", None
81
- return
82
-
83
- device = "cpu"
84
- # Default model dasar untuk training manual di CPU
85
- model_name = "bert-base-multilingual-cased"
86
- tokenizer = AutoTokenizer.from_pretrained(model_name)
87
-
88
- def tokenize_fn(texts):
89
- return tokenizer(texts, padding="max_length", truncation=True, max_length=128, return_tensors="pt")
90
-
91
- encodings = tokenize_fn(df["text"].tolist())
92
- labels = torch.tensor(df[LIST_LABEL].values, dtype=torch.float)
93
- dataset = TensorDataset(encodings["input_ids"], encodings["attention_mask"], labels)
94
- train_loader = DataLoader(dataset, batch_size=int(batch_size), shuffle=True)
95
-
96
- model = ModelEmosi(model_name)
97
- model.to(device)
98
- optimizer = torch.optim.AdamW(model.parameters(), lr=float(lr))
99
- loss_fn = nn.BCEWithLogitsLoss()
100
-
101
- log_text = f"πŸš€ Mulai Training CPU...\nData: {len(df)} baris\n"
102
- yield log_text, None
103
-
104
- model.train()
105
- for ep in range(int(epochs)):
106
- total_loss = 0
107
- for step, batch in enumerate(train_loader):
108
- b_ids, b_mask, b_lbl = batch
109
- optimizer.zero_grad()
110
- out = model(b_ids, b_mask)
111
- loss = loss_fn(out, b_lbl)
112
- loss.backward()
113
- optimizer.step()
114
- total_loss += loss.item()
115
-
116
- # Update progress bar setiap 5 step
117
- if step % 5 == 0:
118
- progress((ep * len(train_loader) + step) / (int(epochs) * len(train_loader)))
119
-
120
- avg_loss = total_loss / len(train_loader)
121
- log_text += f"βœ… Epoch {ep+1} | Loss: {avg_loss:.4f}\n"
122
- yield log_text, None
123
-
124
- # Simpan Model
125
- model.base.save_pretrained(DIR_TRAINED)
126
- tokenizer.save_pretrained(DIR_TRAINED)
127
- torch.save(model.classifier.state_dict(), DIR_TRAINED / "classifier_head.pt")
128
-
129
- set_active_model_path(DIR_TRAINED)
130
- yield log_text + "\nπŸŽ‰ Selesai & Disimpan!", "Model Lokal (Baru Dilatih)"
131
-
132
- # =========================================================
133
- # 5. LOGIKA UPLOAD (DARI COLAB)
134
  # =========================================================
135
  def handle_zip_upload(file_obj):
136
  if file_obj is None: return "❌ Tidak ada file.", None
137
  try:
 
138
  if DIR_UPLOADED.exists(): shutil.rmtree(DIR_UPLOADED)
139
- DIR_UPLOADED.mkdir()
140
 
141
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
142
  zip_ref.extractall(DIR_UPLOADED)
143
 
144
- # Handle jika zip membungkus folder (bukan isi file langsung)
145
- files_in_dir = list(DIR_UPLOADED.iterdir())
146
- if len(files_in_dir) == 1 and files_in_dir[0].is_dir():
147
- subfolder = files_in_dir[0]
148
- for item in subfolder.iterdir():
149
- shutil.move(str(item), str(DIR_UPLOADED))
150
- subfolder.rmdir()
151
-
152
- set_active_model_path(DIR_UPLOADED)
153
- return f"βœ… Model berhasil dimuat dari ZIP!\nLokasi: {DIR_UPLOADED}", "Model Upload (Dari Colab)"
 
 
154
  except Exception as e:
155
  return f"❌ Error unzip: {str(e)}", None
156
 
157
  # =========================================================
158
- # 6. LOGIKA PREDIKSI
159
  # =========================================================
160
  def load_model_inference():
161
  path = get_active_model_path()
162
- if not path: raise ValueError("Belum ada model aktif.")
163
 
164
  path = Path(path)
165
- tokenizer = AutoTokenizer.from_pretrained(path)
166
- model = ModelEmosi(path)
167
-
168
- head_path = path / "classifier_head.pt"
169
- if head_path.exists():
170
- model.classifier.load_state_dict(torch.load(head_path, map_location="cpu"))
171
  model.eval()
172
-
173
- return model, tokenizer
 
174
 
175
  def predict_text(text):
176
  if not text: return None
@@ -179,8 +114,8 @@ def predict_text(text):
179
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
180
 
181
  with torch.no_grad():
182
- out = model(inputs["input_ids"], inputs["attention_mask"])
183
- probs = torch.sigmoid(out).numpy()[0]
184
 
185
  return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
186
  except Exception as e:
@@ -188,16 +123,25 @@ def predict_text(text):
188
 
189
  def predict_csv(file_obj, sep):
190
  try:
191
- df = pd.read_csv(file_obj.name, sep=sep)
 
 
 
 
 
192
  df = clean_data(df)
193
  model, tokenizer = load_model_inference()
194
 
195
  results = []
196
- for txt in df["text"]:
 
 
 
197
  inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
198
  with torch.no_grad():
199
- out = model(inputs["input_ids"], inputs["attention_mask"])
200
- probs = torch.sigmoid(out).numpy()[0]
 
201
  results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
202
 
203
  # Hitung statistik
@@ -217,62 +161,43 @@ def predict_csv(file_obj, sep):
217
  return {"Error": str(e)}
218
 
219
  # =========================================================
220
- # 7. TAMPILAN ANTARMUKA (UI GRADIO)
221
  # =========================================================
222
  with gr.Blocks(title="Emotion AI Manager") as app:
223
- gr.Markdown("#AI Emotion Classifier System")
224
 
225
  # Status Bar Global
226
- lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model yang dipilih.", interactive=False)
227
-
228
- # TAB UTAMA 1: SETUP & PELATIHAN
229
- with gr.Tab("βš™οΈ Pelatihan & Model"):
230
- with gr.Tabs():
231
-
232
- # Sub-Tab 1.1: Upload Pretrained Model
233
- with gr.Tab("πŸ“‚ Upload Pretrained Model"):
234
- gr.Markdown("Sudah punya model terlatih? gunakan model hasil training model")
235
- in_zip = gr.File(label="Upload File .zip Model", file_types=[".zip"])
236
- btn_upload = gr.Button("Ekstrak & Aktifkan Model", variant="primary")
237
- out_log_upload = gr.Textbox(label="Log Sistem")
238
-
239
- btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
240
 
241
- # Sub-Tab 1.2: Latihan Manual
242
- with gr.Tab("πŸ‹οΈβ€β™€οΈ Latihan Manual"):
243
- gr.Markdown("Belum punya model? latih file csv [text;label emosi (1/0)]")
244
- with gr.Row():
245
- in_csv = gr.File(label="Dataset CSV")
246
- in_sep = gr.Textbox(label="Separator", value=";")
247
- with gr.Row():
248
- in_ep = gr.Number(label="Epoch", value=1)
249
- in_bs = gr.Number(label="Batch", value=4)
250
- in_lr = gr.Number(label="LR", value=2e-5)
251
-
252
- btn_train = gr.Button("Mulai Latihan")
253
- out_log_train = gr.Textbox(label="Log Training", lines=6)
254
-
255
- btn_train.click(run_training_generator, inputs=[in_csv, in_sep, in_ep, in_bs, in_lr], outputs=[out_log_train, lbl_active_model])
256
-
257
- # TAB UTAMA 2: PENGUJIAN
258
- with gr.Tab("πŸ§ͺ Testing"):
259
- with gr.Tabs():
260
-
261
- # Sub-Tab 2.1: Uji Tunggal
262
- with gr.Tab("πŸ“ Uji Tunggal (Teks)"):
263
- in_txt = gr.Textbox(label="Masukkan Kalimat", placeholder="Saya merasa...")
264
- btn_pred_txt = gr.Button("Prediksi Emosi")
265
- out_lbl = gr.Label(label="Confidence Score")
266
-
267
- btn_pred_txt.click(predict_text, inputs=in_txt, outputs=out_lbl)
268
-
269
- # Sub-Tab 2.2: Uji Batch
270
- with gr.Tab("πŸ“Š Uji Batch (CSV)"):
271
- in_csv_test = gr.File(label="Upload CSV Test")
272
- in_sep_test = gr.Textbox(label="Separator", value=";")
273
- btn_pred_csv = gr.Button("Analisis Batch")
274
- out_json = gr.JSON(label="Hasil Analisis")
275
 
276
- btn_pred_csv.click(predict_csv, inputs=[in_csv_test, in_sep_test], outputs=out_json)
277
-
278
- app.queue().launch()
 
 
 
 
 
 
 
 
 
5
  import gradio as gr
6
  import zipfile
7
  import shutil
8
+ import sys
9
  from pathlib import Path
10
  from torch import nn
11
  from torch.utils.data import DataLoader, TensorDataset
12
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
13
 
14
  # =========================================================
15
  # 1. KONFIGURASI & SETUP
16
  # =========================================================
 
 
 
17
 
18
+ LIST_LABEL = ['anger','anticipation','disgust','fear','joy','sadness','surprise','trust']
 
19
 
20
+ # Setup Path
21
+ def get_root_path():
22
+ if getattr(sys, 'frozen', False):
23
+ return Path(sys.executable).parent
24
+ else:
25
+ return Path(__file__).parent
26
 
27
+ BASE_DIR = get_root_path()
28
+ DIR_TRAINED = BASE_DIR / "saved_models" / "trained_local"
29
+ DIR_UPLOADED = BASE_DIR / "saved_models" / "uploaded_colab"
30
+ ACTIVE_MODEL_POINTER = BASE_DIR / "active_model_path.txt"
 
 
 
 
 
 
 
31
 
32
+ DIR_TRAINED.mkdir(parents=True, exist_ok=True)
33
+ DIR_UPLOADED.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
34
 
35
  # =========================================================
36
+ # 2. HELPER FUNCTIONS
37
  # =========================================================
38
  def clean_data(df):
39
+ # Cek kolom label dan tipenya
40
  for l in LIST_LABEL:
41
  if l not in df.columns: df[l] = 0
42
+ # Fix format koma (1,00 -> 1.00)
43
+ df[l] = df[l].astype(str).str.replace(',', '.', regex=False)
44
  df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
45
 
46
  # Bersihkan teks
47
+ col_text = next((c for c in df.columns if c.lower() in ['text', 'kalimat', 'content', 'tweet']), None)
48
+ if col_text:
49
+ df["text_clean"] = df[col_text].astype(str).str.replace("\n", " ").str.strip()
50
+ elif "text" in df.columns:
51
+ df["text_clean"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
52
+
53
  return df
54
 
55
  def get_active_model_path():
 
64
  f.write(str(path))
65
 
66
  # =========================================================
67
+ # 3. LOGIKA UPLOAD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # =========================================================
69
  def handle_zip_upload(file_obj):
70
  if file_obj is None: return "❌ Tidak ada file.", None
71
  try:
72
+ # Bersihkan folder lama
73
  if DIR_UPLOADED.exists(): shutil.rmtree(DIR_UPLOADED)
74
+ DIR_UPLOADED.mkdir(parents=True, exist_ok=True)
75
 
76
  with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
77
  zip_ref.extractall(DIR_UPLOADED)
78
 
79
+ # Handle jika zip membungkus folder (nested folder)
80
+ # Cari file config.json untuk menentukan root folder model
81
+ config_path = list(DIR_UPLOADED.rglob("config.json"))
82
+
83
+ if not config_path:
84
+ return "❌ Error: Tidak ditemukan config.json di dalam zip.", None
85
+
86
+ final_model_path = config_path[0].parent
87
+
88
+ # Simpan path yang valid
89
+ set_active_model_path(final_model_path)
90
+ return f"βœ… Model berhasil dimuat!\nLokasi: {final_model_path}", "Model Upload (Siap)"
91
  except Exception as e:
92
  return f"❌ Error unzip: {str(e)}", None
93
 
94
  # =========================================================
95
+ # 4. LOGIKA PREDIKSI
96
  # =========================================================
97
  def load_model_inference():
98
  path = get_active_model_path()
99
+ if not path: raise ValueError("Belum ada model aktif. Upload dulu!")
100
 
101
  path = Path(path)
102
+ try:
103
+ tokenizer = AutoTokenizer.from_pretrained(str(path))
104
+ model = AutoModelForSequenceClassification.from_pretrained(str(path))
 
 
 
105
  model.eval()
106
+ return model, tokenizer
107
+ except Exception as e:
108
+ raise ValueError(f"Gagal load model: {e}")
109
 
110
  def predict_text(text):
111
  if not text: return None
 
114
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
115
 
116
  with torch.no_grad():
117
+ out = model(**inputs)
118
+ probs = torch.sigmoid(out.logits).numpy()[0]
119
 
120
  return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
121
  except Exception as e:
 
123
 
124
  def predict_csv(file_obj, sep):
125
  try:
126
+ # Cek separator
127
+ try:
128
+ df = pd.read_csv(file_obj.name, sep=sep)
129
+ except:
130
+ df = pd.read_csv(file_obj.name, sep=",")
131
+
132
  df = clean_data(df)
133
  model, tokenizer = load_model_inference()
134
 
135
  results = []
136
+ # Cek kolom text
137
+ if "text_clean" not in df.columns: return {"Error": "Kolom teks tidak ditemukan"}
138
+
139
+ for txt in df["text_clean"]:
140
  inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
141
  with torch.no_grad():
142
+ out = model(**inputs)
143
+ probs = torch.sigmoid(out.logits).numpy()[0]
144
+
145
  results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
146
 
147
  # Hitung statistik
 
161
  return {"Error": str(e)}
162
 
163
  # =========================================================
164
+ # 5. TAMPILAN ANTARMUKA (UI GRADIO)
165
  # =========================================================
166
  with gr.Blocks(title="Emotion AI Manager") as app:
167
+ gr.Markdown("# 🧠 AI Emotion Classifier System")
168
 
169
  # Status Bar Global
170
+ lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model.", interactive=False)
171
+
172
+ with gr.Tabs():
173
+ # TAB 1: UPLOAD
174
+ with gr.Tab("πŸ“‚ Upload Model"):
175
+ gr.Markdown("Upload file `.zip` model hasil training.")
176
+ in_zip = gr.File(label="Upload File .zip", file_types=[".zip"])
177
+ btn_upload = gr.Button("Ekstrak & Aktifkan", variant="primary")
178
+ out_log_upload = gr.Textbox(label="Log Sistem")
 
 
 
 
 
179
 
180
+ btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
181
+
182
+ # TAB 2: PENGUJIAN
183
+ with gr.Tab("πŸ§ͺ Testing"):
184
+ with gr.Tabs():
185
+ # Sub-Tab 2.1: Uji Tunggal
186
+ with gr.Tab("πŸ“ Uji Tunggal"):
187
+ in_txt = gr.Textbox(label="Masukkan Kalimat", placeholder="Saya merasa...", lines=2)
188
+ btn_pred_txt = gr.Button("Prediksi", variant="primary")
189
+ out_lbl = gr.Label(label="Hasil Prediksi")
190
+
191
+ btn_pred_txt.click(predict_text, inputs=in_txt, outputs=out_lbl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ # Sub-Tab 2.2: Uji Batch
194
+ with gr.Tab("πŸ“Š Uji Batch (CSV)"):
195
+ in_csv_test = gr.File(label="Upload CSV Test")
196
+ in_sep_test = gr.Textbox(label="Separator", value=";")
197
+ btn_pred_csv = gr.Button("Analisis Batch")
198
+ out_json = gr.JSON(label="Hasil Analisis")
199
+
200
+ btn_pred_csv.click(predict_csv, inputs=[in_csv_test, in_sep_test], outputs=out_json)
201
+
202
+ if __name__ == "__main__":
203
+ app.queue().launch()