Spaces:

Darendra
/

BERT_Emotion_Classification

Sleeping

App Files Files Community

Darendra commited on Dec 11, 2025

Commit

dafa625

verified ·

1 Parent(s): 74dd21d

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -15

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ ACTIVE_MODEL_POINTER = "active_model_path.txt"
 class ModelEmosi(nn.Module):
     def __init__(self, base_model_name, num_labels=8):
         super().__init__()
         self.config = AutoConfig.from_pretrained(base_model_name)
         self.base = AutoModel.from_pretrained(base_model_name)
         self.dropout = nn.Dropout(0.3)
@@ -38,6 +39,7 @@ class ModelEmosi(nn.Module):
         if hasattr(out, "pooler_output") and out.pooler_output is not None:
             x = out.pooler_output
         else:
             x = out.last_hidden_state[:, 0, :]
         return self.classifier(self.dropout(x))
@@ -45,9 +47,12 @@ class ModelEmosi(nn.Module):
 # 3. HELPER FUNCTIONS
 # =========================================================
 def clean_data(df):
     for l in LIST_LABEL:
         if l not in df.columns: df[l] = 0
         df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
     if "text" in df.columns:
         df["text"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
     return df
@@ -64,7 +69,7 @@ def set_active_model_path(path):
         f.write(str(path))
 # =========================================================
-# 4. LOGIKA TRAINING (CPU)
 # =========================================================
 def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Progress()):
     yield "⏳ Membaca dataset...", None
@@ -76,6 +81,7 @@ def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Pr
         return
     device = "cpu"
     model_name = "bert-base-multilingual-cased"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -107,18 +113,20 @@ def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Pr
             optimizer.step()
             total_loss += loss.item()
             if step % 5 == 0:
                 progress((ep * len(train_loader) + step) / (int(epochs) * len(train_loader)))
         avg_loss = total_loss / len(train_loader)
         log_text += f"✅ Epoch {ep+1} | Loss: {avg_loss:.4f}\n"
         yield log_text, None
     model.base.save_pretrained(DIR_TRAINED)
     tokenizer.save_pretrained(DIR_TRAINED)
     torch.save(model.classifier.state_dict(), DIR_TRAINED / "classifier_head.pt")
-    set_active_model_path(DIR_TRAINED)
     yield log_text + "\n🎉 Selesai & Disimpan!", "Model Lokal (Baru Dilatih)"
 # =========================================================
@@ -133,7 +141,7 @@ def handle_zip_upload(file_obj):
         with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
             zip_ref.extractall(DIR_UPLOADED)
-        # Handle jika ada subfolder
         files_in_dir = list(DIR_UPLOADED.iterdir())
         if len(files_in_dir) == 1 and files_in_dir[0].is_dir():
             subfolder = files_in_dir[0]
@@ -154,15 +162,14 @@ def load_model_inference():
     if not path: raise ValueError("Belum ada model aktif.")
     path = Path(path)
-    config = AutoConfig.from_pretrained(path)
     tokenizer = AutoTokenizer.from_pretrained(path)
     model = ModelEmosi(path)
     head_path = path / "classifier_head.pt"
     if head_path.exists():
         model.classifier.load_state_dict(torch.load(head_path, map_location="cpu"))
-    model.eval()
     return model, tokenizer
 def predict_text(text):
@@ -170,9 +177,11 @@ def predict_text(text):
     try:
         model, tokenizer = load_model_inference()
         inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
         with torch.no_grad():
             out = model(inputs["input_ids"], inputs["attention_mask"])
             probs = torch.sigmoid(out).numpy()[0]
         return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
     except Exception as e:
         return {"Error": str(e)}
@@ -182,6 +191,7 @@ def predict_csv(file_obj, sep):
         df = pd.read_csv(file_obj.name, sep=sep)
         df = clean_data(df)
         model, tokenizer = load_model_inference()
         results = []
         for txt in df["text"]:
             inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
@@ -190,21 +200,27 @@ def predict_csv(file_obj, sep):
                 probs = torch.sigmoid(out).numpy()[0]
             results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
         avg = {l: 0.0 for l in LIST_LABEL}
         for r in results:
             for l,v in r.items(): avg[l] += v
         for l in avg: avg[l] /= len(results)
         top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
-        return {"Total Data": len(results), "Top 3 Emosi": {k: round(v,4) for k,v in top3}, "Rata-rata": avg}
     except Exception as e:
         return {"Error": str(e)}
 # =========================================================
-# 7. TAMPILAN ANTARMUKA (UI)
 # =========================================================
 with gr.Blocks(title="Emotion AI Manager") as app:
-    gr.Markdown("# 🎭 AI Emotion Classifier System")
     # Status Bar Global
     lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model yang dipilih.", interactive=False)
@@ -213,9 +229,9 @@ with gr.Blocks(title="Emotion AI Manager") as app:
     with gr.Tab("⚙️ Pelatihan & Model"):
         with gr.Tabs():
-            # Sub-Tab 1.1: Upload (Paling Recommended)
-            with gr.Tab("📂 Upload Pretrained (Recommended)"):
-                gr.Markdown("Gunakan model hasil training GPU (Colab) agar cepat.")
                 in_zip = gr.File(label="Upload File .zip Model", file_types=[".zip"])
                 btn_upload = gr.Button("Ekstrak & Aktifkan Model", variant="primary")
                 out_log_upload = gr.Textbox(label="Log Sistem")
@@ -223,8 +239,8 @@ with gr.Blocks(title="Emotion AI Manager") as app:
                 btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
             # Sub-Tab 1.2: Latihan Manual
-            with gr.Tab("🏋️‍♀️ Latihan Manual (CPU)"):
-                gr.Markdown("⚠️ Lambat di Hugging Face Space. Gunakan data kecil saja.")
                 with gr.Row():
                     in_csv = gr.File(label="Dataset CSV")
                     in_sep = gr.Textbox(label="Separator", value=";")
@@ -239,7 +255,7 @@ with gr.Blocks(title="Emotion AI Manager") as app:
                 btn_train.click(run_training_generator, inputs=[in_csv, in_sep, in_ep, in_bs, in_lr], outputs=[out_log_train, lbl_active_model])
     # TAB UTAMA 2: PENGUJIAN
-    with gr.Tab("🧪 Pengujian (Testing)"):
         with gr.Tabs():
             # Sub-Tab 2.1: Uji Tunggal

 class ModelEmosi(nn.Module):
     def __init__(self, base_model_name, num_labels=8):
         super().__init__()
+        # Load config agar fleksibel (bisa baca dari folder atau nama model)
         self.config = AutoConfig.from_pretrained(base_model_name)
         self.base = AutoModel.from_pretrained(base_model_name)
         self.dropout = nn.Dropout(0.3)
         if hasattr(out, "pooler_output") and out.pooler_output is not None:
             x = out.pooler_output
         else:
+            # Fallback jika model tidak punya pooler (misal DistilBERT)
             x = out.last_hidden_state[:, 0, :]
         return self.classifier(self.dropout(x))
 # 3. HELPER FUNCTIONS
 # =========================================================
 def clean_data(df):
+    # Pastikan kolom label ada dan bertipe float
     for l in LIST_LABEL:
         if l not in df.columns: df[l] = 0
         df[l] = pd.to_numeric(df[l], errors='coerce').fillna(0).astype(float)
+    # Bersihkan teks
     if "text" in df.columns:
         df["text"] = df["text"].astype(str).str.replace("\n", " ").str.strip()
     return df
         f.write(str(path))
 # =========================================================
+# 4. LOGIKA TRAINING (CPU - HANYA UNTUK DATA KECIL)
 # =========================================================
 def run_training_generator(file_obj, sep, epochs, batch_size, lr, progress=gr.Progress()):
     yield "⏳ Membaca dataset...", None
         return
     device = "cpu"
+    # Default model dasar untuk training manual di CPU
     model_name = "bert-base-multilingual-cased"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
             optimizer.step()
             total_loss += loss.item()
+            # Update progress bar setiap 5 step
             if step % 5 == 0:
                 progress((ep * len(train_loader) + step) / (int(epochs) * len(train_loader)))
         avg_loss = total_loss / len(train_loader)
         log_text += f"✅ Epoch {ep+1} | Loss: {avg_loss:.4f}\n"
         yield log_text, None
+    # Simpan Model
     model.base.save_pretrained(DIR_TRAINED)
     tokenizer.save_pretrained(DIR_TRAINED)
     torch.save(model.classifier.state_dict(), DIR_TRAINED / "classifier_head.pt")
+    set_active_model_path(DIR_TRAINED)
     yield log_text + "\n🎉 Selesai & Disimpan!", "Model Lokal (Baru Dilatih)"
 # =========================================================
         with zipfile.ZipFile(file_obj.name, 'r') as zip_ref:
             zip_ref.extractall(DIR_UPLOADED)
+        # Handle jika zip membungkus folder (bukan isi file langsung)
         files_in_dir = list(DIR_UPLOADED.iterdir())
         if len(files_in_dir) == 1 and files_in_dir[0].is_dir():
             subfolder = files_in_dir[0]
     if not path: raise ValueError("Belum ada model aktif.")
     path = Path(path)
     tokenizer = AutoTokenizer.from_pretrained(path)
     model = ModelEmosi(path)
     head_path = path / "classifier_head.pt"
     if head_path.exists():
         model.classifier.load_state_dict(torch.load(head_path, map_location="cpu"))
+        model.eval()
     return model, tokenizer
 def predict_text(text):
     try:
         model, tokenizer = load_model_inference()
         inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
         with torch.no_grad():
             out = model(inputs["input_ids"], inputs["attention_mask"])
             probs = torch.sigmoid(out).numpy()[0]
         return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
     except Exception as e:
         return {"Error": str(e)}
         df = pd.read_csv(file_obj.name, sep=sep)
         df = clean_data(df)
         model, tokenizer = load_model_inference()
         results = []
         for txt in df["text"]:
             inputs = tokenizer(txt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
                 probs = torch.sigmoid(out).numpy()[0]
             results.append({LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))})
+        # Hitung statistik
         avg = {l: 0.0 for l in LIST_LABEL}
         for r in results:
             for l,v in r.items(): avg[l] += v
         for l in avg: avg[l] /= len(results)
         top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
+        return {
+            "Total Data": len(results),
+            "Top 3 Emosi Dominan": {k: round(v,4) for k,v in top3},
+            "Rata-rata Skor": avg
+        }
     except Exception as e:
         return {"Error": str(e)}
 # =========================================================
+# 7. TAMPILAN ANTARMUKA (UI GRADIO)
 # =========================================================
 with gr.Blocks(title="Emotion AI Manager") as app:
+    gr.Markdown("#AI Emotion Classifier System")
     # Status Bar Global
     lbl_active_model = gr.Textbox(label="Status Model Aktif", value="Belum ada model yang dipilih.", interactive=False)
     with gr.Tab("⚙️ Pelatihan & Model"):
         with gr.Tabs():
+            # Sub-Tab 1.1: Upload Pretrained Model
+            with gr.Tab("📂 Upload Pretrained Model"):
+                gr.Markdown("Sudah punya model terlatih? gunakan model hasil training model")
                 in_zip = gr.File(label="Upload File .zip Model", file_types=[".zip"])
                 btn_upload = gr.Button("Ekstrak & Aktifkan Model", variant="primary")
                 out_log_upload = gr.Textbox(label="Log Sistem")
                 btn_upload.click(handle_zip_upload, inputs=in_zip, outputs=[out_log_upload, lbl_active_model])
             # Sub-Tab 1.2: Latihan Manual
+            with gr.Tab("🏋️‍♀️ Latihan Manual"):
+                gr.Markdown("Belum punya model? latih file csv [text;label emosi (1/0)]")
                 with gr.Row():
                     in_csv = gr.File(label="Dataset CSV")
                     in_sep = gr.Textbox(label="Separator", value=";")
                 btn_train.click(run_training_generator, inputs=[in_csv, in_sep, in_ep, in_bs, in_lr], outputs=[out_log_train, lbl_active_model])
     # TAB UTAMA 2: PENGUJIAN
+    with gr.Tab("🧪 Testing"):
         with gr.Tabs():
             # Sub-Tab 2.1: Uji Tunggal