Spaces:

Darendra
/

BERT_Emotion_Classification

Sleeping

App Files Files Community

Darendra commited on Dec 7, 2025

Commit

1a47d90

verified ·

1 Parent(s): 7db873d

Update app.py

Browse files

Files changed (1) hide show

app.py +248 -240

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # ==============================================================
 #                       EMOTION CLASSIFIER
 # ==============================================================
 import os
 import math
 import torch
@@ -9,7 +8,6 @@ import pandas as pd
 import numpy as np
 import gradio as gr
 import matplotlib.pyplot as plt
 from pathlib import Path
 from torch import nn
 from torch.utils.data import Dataset, DataLoader, TensorDataset
@@ -24,97 +22,96 @@ from transformers import (
 # =========================================================
 # CONFIG
 # =========================================================
-LABELS = ['anger','anticipation','disgust','fear','joy','sadness','surprise','trust']
-LABEL2ID = {l:i for i,l in enumerate(LABELS)}
-ID2LABEL = {i:l for i,l in enumerate(LABELS)}
-SAVED_ROOT = Path("saved_models")
-SAVED_ROOT.mkdir(exist_ok=True)
 # ==============================================================
-#                     Simpan dan Muat Data
 # ==============================================================
-def read_uploaded_file(uploaded):
-    if uploaded is None:
-        raise ValueError("No file provided")
-    if isinstance(uploaded, str):
-        return uploaded
-    if hasattr(uploaded, "name"):
-        return uploaded.name
-    if hasattr(uploaded, "read"):
-        tmp = Path("/tmp") / f"uploaded_{np.random.randint(1e9)}.csv"
-        with open(tmp, "wb") as f:
-            f.write(uploaded.read())
-        return str(tmp)
-    raise ValueError("Unsupported uploaded file type")
-def save_last_model_name(name):
-    (SAVED_ROOT / "last_model.txt").write_text(name)
-def load_last_model_name():
-    p = SAVED_ROOT / "last_model.txt"
-    if p.exists():
-        return p.read_text().strip()
     return None
-def model_folder(model_name):
-    return SAVED_ROOT / model_name.replace("/", "_")
 # ==============================================================
-#                        Pembersihan Data
 # ==============================================================
 def clean_labels(df):
-    for l in LABELS:
         if l not in df.columns:
             df[l] = 0
     return df
 def clean_text(df, col="text"):
     if col not in df.columns:
-        raise KeyError(f"CSV must contain a column '{col}'")
     df[col] = df[col].astype(str).str.replace("\n", " ").str.strip()
     return df
 # =========================================================
-#                        Model AI
 # =========================================================
-class EmotionModel(nn.Module):
-    """Consistent backbone + dropout + classifier."""
     def __init__(self, base_model_name, num_labels=8):
         super().__init__()
         self.config = AutoConfig.from_pretrained(base_model_name)
         self.base = AutoModel.from_pretrained(base_model_name)
-        self.drop = nn.Dropout(0.3)
-        self.clf = nn.Linear(self.config.hidden_size, num_labels)
-    def forward(self, ids, mask):
         out = self.base(
-            input_ids=ids,
-            attention_mask=mask
         )
-        # Prefer pooler_output if exists
         if hasattr(out, "pooler_output") and out.pooler_output is not None:
             x = out.pooler_output
         else:
             x = out.last_hidden_state[:, 0, :]
-        x = self.drop(x)
-        return self.clf(x)
 # ==============================================================
-#                       Tokenisasi Dataset
 # ==============================================================
 def tokenize_batch(texts, tokenizer, max_len=128):
     return tokenizer(
@@ -125,63 +122,55 @@ def tokenize_batch(texts, tokenizer, max_len=128):
         return_tensors="pt"
     )
-def build_tensor_dataset(df, tokenizer, max_len=128):
-    enc = tokenize_batch(df["text"].tolist(), tokenizer, max_len)
-    labels = torch.tensor(df[LABELS].values, dtype=torch.float)
     return TensorDataset(
-        enc["input_ids"],
-        enc["attention_mask"],
         labels
     )
 # ==============================================================
-#                           Bobot
 # ==============================================================
-def compute_pos_weight(df):
-    counts = df[LABELS].sum(axis=0)
     N = len(df)
     pw = []
     for c in counts:
         pw.append((N - c) / c if c > 0 else 1.0)
     return torch.tensor(pw, dtype=torch.float)
 # ==============================================================
-#                       Simpan dan Muat Model
 # ==============================================================
 def save_model(model, tokenizer, folder):
     os.makedirs(folder, exist_ok=True)
-    # Save backbone HF style
     model.base.save_pretrained(folder)
     tokenizer.save_pretrained(folder)
-    # Save classifier head
-    torch.save(model.clf.state_dict(), str(Path(folder) / "classifier.pt"))
-    # Save last-used name
-    save_last_model_name(str(folder))
 def load_model(folder):
     folder = str(folder)
     config = AutoConfig.from_pretrained(folder)
     tokenizer = AutoTokenizer.from_pretrained(folder)
-    model = EmotionModel(folder)
-    state = torch.load(f"{folder}/classifier.pt", map_location="cpu")
-    model.clf.load_state_dict(state)
     model.eval()
     return model, tokenizer, config
 # ==============================================================
-#                            Pelatihan
 # ==============================================================
-def train_model(
     df,
     model_name="bert-base-multilingual-cased",
     epochs=3,
@@ -195,32 +184,30 @@ def train_model(
     device=None
 ):
     device = device or ("cuda" if torch.cuda.is_available() else "cpu")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     df = df.reset_index(drop=True)
-    dataset = build_tensor_dataset(df, tokenizer, max_len)
-    idx = list(range(len(dataset)))
     train_idx, val_idx = train_test_split(idx, test_size=0.15, random_state=42)
-    def subset(ds, idxs):
         return TensorDataset(
-            torch.stack([ds[i][0] for i in idxs]),
-            torch.stack([ds[i][1] for i in idxs]),
-            torch.stack([ds[i][2] for i in idxs]),
         )
-    train_ds = subset(dataset, train_idx)
-    val_ds = subset(dataset, val_idx)
     train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
     val_loader = DataLoader(val_ds, batch_size=batch_size)
-    model = EmotionModel(model_name)
     model.to(device)
-    # Freeze lower layers
     for name, param in model.base.named_parameters():
         if name.startswith("embeddings."):
             param.requires_grad = False
@@ -231,91 +218,93 @@ def train_model(
                     param.requires_grad = False
             except:
                 pass
-    pos_weight = compute_pos_weight(df).to(device)
     loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
     optimizer = torch.optim.AdamW(
         filter(lambda p: p.requires_grad, model.parameters()),
         lr=lr,
         weight_decay=weight_decay
     )
     total_steps = len(train_loader) * epochs
     warmup_steps = int(warmup_ratio * total_steps)
     scheduler = get_linear_schedule_with_warmup(
         optimizer,
         num_warmup_steps=warmup_steps,
         num_training_steps=total_steps
     )
-    best_val = float("inf")
     no_improve = 0
     history = {"train_loss": [], "val_loss": []}
-    save_path = str(model_folder(model_name))
     for ep in range(1, epochs+1):
         model.train()
-        t_loss = 0
-        for input_ids, attn, labels in train_loader:
             input_ids = input_ids.to(device)
-            attn = attn.to(device)
             labels = labels.to(device)
             optimizer.zero_grad()
-            logits = model(input_ids, attn)
             loss = loss_fn(logits, labels)
             loss.backward()
             optimizer.step()
             scheduler.step()
-            t_loss += loss.item() * input_ids.size(0)
-        train_loss = t_loss / len(train_loader.dataset)
-        history["train_loss"].append(train_loss)
-        # Validation
         model.eval()
-        v_loss = 0
         with torch.no_grad():
-            for input_ids, attn, labels in val_loader:
                 input_ids = input_ids.to(device)
-                attn = attn.to(device)
                 labels = labels.to(device)
-                logits = model(input_ids, attn)
                 loss = loss_fn(logits, labels)
-                v_loss += loss.item() * input_ids.size(0)
-        val_loss = v_loss / len(val_loader.dataset)
-        history["val_loss"].append(val_loss)
-        print(f"Epoch {ep} | Train={train_loss:.4f} | Val={val_loss:.4f}")
-        if val_loss < best_val:
-            best_val = val_loss
             no_improve = 0
             save_model(model, tokenizer, save_path)
-            print(f"Saved best model to {save_path}")
         else:
             no_improve += 1
             if no_improve >= patience:
-                print("Early stopping.")
                 break
     return model, tokenizer, history
 # ==============================================================
-#                              Uji
 # ==============================================================
-def predict_single(text, folder=None):
-    folder = folder or load_last_model_name()
-    model, tokenizer, cfg = load_model(folder)
     encoded = tokenizer(
         text,
         padding="max_length",
@@ -323,150 +312,169 @@ def predict_single(text, folder=None):
         max_length=128,
         return_tensors="pt"
     )
     with torch.no_grad():
         out = model(encoded["input_ids"], encoded["attention_mask"])
         probs = torch.sigmoid(out).numpy()[0]
-    return {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
-def predict_batch(texts, folder=None, batch_size=32):
-    folder = folder or load_last_model_name()
-    model, tokenizer, cfg = load_model(folder)
     preds = []
-    for i in range(0, len(texts), batch_size):
-        batch = texts[i:i+batch_size]
-        enc = tokenizer(
             batch,
             padding="max_length",
             truncation=True,
             max_length=128,
             return_tensors="pt"
         )
         with torch.no_grad():
-            out = model(enc["input_ids"], enc["attention_mask"])
             probs = torch.sigmoid(out).numpy()
         for p in probs:
-            preds.append({LABELS[j]: float(p[j]) for j in range(len(LABELS))})
     return preds
-def summarize_preds(preds):
-    avg = {l: 0.0 for l in LABELS}
     n = len(preds)
     for p in preds:
         for l,v in p.items():
             avg[l] += v
     for l in avg:
         avg[l] /= n
     top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
-    top3 = [{"label":l, "score":float(s)} for l,s in top3]
-    return {"n":n, "avg_distribution":avg, "top3":top3}
 # ==============================================================
-#                             GRADIO GUI
 # ==============================================================
-def wrapper_train(file_obj, sep, model_name, epochs, batch_size, lr,
-                  max_len, weight_decay, warmup_ratio, patience, freeze_layers):
-    csv = read_uploaded_file(file_obj)
-    df = pd.read_csv(csv, sep=sep)
     df = clean_labels(df)
     df = clean_text(df)
-    _, _, history = train_model(
         df=df,
         model_name=model_name,
-        epochs=int(epochs),
-        batch_size=int(batch_size),
         lr=float(lr),
         max_len=int(max_len),
-        weight_decay=float(weight_decay),
-        warmup_ratio=float(warmup_ratio),
-        patience=int(patience),
-        freeze_layers=int(freeze_layers)
     )
     return {
-        "message": "Training finished.",
         "history": history,
-        "model_name": model_name
     }
-def wrapper_single(text):
-    return predict_single(text)
-def wrapper_dataset(file_obj, sep, max_len, batch_size):
-    csv = read_uploaded_file(file_obj)
-    df = pd.read_csv(csv, sep=sep)
     df = clean_labels(df)
     df = clean_text(df)
     preds = predict_batch(df["text"].tolist(), batch_size=int(batch_size))
-    return summarize_preds(preds)
 # ==============================================================
-#                         Menjalankan GRADIO
 # ==============================================================
 with gr.Blocks() as app:
-    gr.Markdown("## Emotion Classifier — Dava (Final Version)")
-    with gr.Tab("Training"):
-        file_in = gr.File(label="Upload Training CSV")
-        sep_in = gr.Textbox(label="Delimiter", value=",")
-        model_name_in = gr.Dropdown(
-            label="Backbone Model",
             choices=["bert-base-multilingual-cased", "indobert-base-p1"],
             value="bert-base-multilingual-cased"
         )
-        epochs_in = gr.Number(label="Epochs", value=3)
-        bs_in = gr.Number(label="Batch Size", value=8)
-        lr_in = gr.Number(label="Learning Rate", value=2e-5)
-        maxlen_in = gr.Number(label="Max Length", value=128)
-        wd_in = gr.Number(label="Weight Decay", value=0.01)
-        warmup_in = gr.Number(label="Warmup Ratio", value=0.1)
-        patience_in = gr.Number(label="Patience", value=2)
-        freeze_in = gr.Number(label="Freeze Layers", value=6)
-        btn_train = gr.Button("Start Training")
-        out_train = gr.JSON(label="Train Result")
         btn_train.click(
-            wrapper_train,
-            inputs=[file_in, sep_in, model_name_in, epochs_in, bs_in,
-                    lr_in, maxlen_in, wd_in, warmup_in, patience_in, freeze_in],
             outputs=out_train
         )
-    with gr.Tab("Single Prediction"):
-        text_in = gr.Textbox(label="Text")
-        btn_single = gr.Button("Predict")
-        out_single = gr.JSON(label="Emotion Scores")
-        btn_single.click(wrapper_single, inputs=[text_in], outputs=out_single)
-    with gr.Tab("Dataset Prediction"):
-        file_test = gr.File(label="Upload CSV")
-        sep_test = gr.Textbox(label="Delimiter", value=",")
-        maxlen_test = gr.Number(label="Max Length", value=128)
-        bs_test = gr.Number(label="Batch Size", value=32)
         btn_test = gr.Button("Run Prediction")
-        out_test = gr.JSON(label="Summary Result")
         btn_test.click(
-            wrapper_dataset,
-            inputs=[file_test, sep_test, maxlen_test, bs_test],
             outputs=out_test
         )

 # ==============================================================
 #                       EMOTION CLASSIFIER
 # ==============================================================
 import os
 import math
 import torch
 import numpy as np
 import gradio as gr
 import matplotlib.pyplot as plt
 from pathlib import Path
 from torch import nn
 from torch.utils.data import Dataset, DataLoader, TensorDataset
 # =========================================================
 # CONFIG
 # =========================================================
+LIST_LABEL = ['anger','anticipation','disgust','fear','joy','sadness','surprise','trust']
+LABEL2ID = {l:i for i,l in enumerate(LIST_LABEL)}
+ID2LABEL = {i:l for i,l in enumerate(LIST_LABEL)}
+FOLDER_MODEL = Path("saved_models")
+FOLDER_MODEL.mkdir(exist_ok=True)
 # ==============================================================
+#                     File & Utils
 # ==============================================================
+def read_file_upload(file_obj):
+    """Handle file upload dari Gradio."""
+    if file_obj is None:
+        raise ValueError("File belum diupload.")
+    # Kalau inputnya string path
+    if isinstance(file_obj, str):
+        return file_obj
+    # Kalau inputnya object file (Gradio baru)
+    if hasattr(file_obj, "name"):
+        return file_obj.name
+    # Kalau binary stream
+    if hasattr(file_obj, "read"):
+        temp_path = Path("/tmp") / f"upload_{np.random.randint(1e9)}.csv"
+        with open(temp_path, "wb") as f:
+            f.write(file_obj.read())
+        return str(temp_path)
+    raise ValueError("Tipe file tidak didukung.")
+# --- FUNGSI YANG DIUBAH (LEBIH SINGKAT) ---
+def save_last_model(name):
+    (FOLDER_MODEL / "last_model_name.txt").write_text(name)
+def load_last_model():
+    path_file = FOLDER_MODEL / "last_model_name.txt"
+    if path_file.exists():
+        return path_file.read_text().strip()
     return None
+# ------------------------------------------
+def get_model_path(model_name):
+    return FOLDER_MODEL / model_name.replace("/", "_")
 # ==============================================================
+#                        Data Cleaning
 # ==============================================================
 def clean_labels(df):
+    """Isi label kosong dengan 0."""
+    for l in LIST_LABEL:
         if l not in df.columns:
             df[l] = 0
     return df
 def clean_text(df, col="text"):
+    """Hapus enter dan spasi berlebih."""
     if col not in df.columns:
+        raise KeyError(f"CSV harus punya kolom '{col}'")
     df[col] = df[col].astype(str).str.replace("\n", " ").str.strip()
     return df
 # =========================================================
+#                        Model Architecture
 # =========================================================
+class ModelEmosi(nn.Module):
+    """Backbone BERT + Classifier Head."""
     def __init__(self, base_model_name, num_labels=8):
         super().__init__()
         self.config = AutoConfig.from_pretrained(base_model_name)
         self.base = AutoModel.from_pretrained(base_model_name)
+        self.dropout = nn.Dropout(0.3)
+        self.classifier = nn.Linear(self.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
         out = self.base(
+            input_ids=input_ids,
+            attention_mask=attention_mask
         )
         if hasattr(out, "pooler_output") and out.pooler_output is not None:
             x = out.pooler_output
         else:
             x = out.last_hidden_state[:, 0, :]
+        x = self.dropout(x)
+        return self.classifier(x)
 # ==============================================================
+#                       Tokenizer & Dataset
 # ==============================================================
 def tokenize_batch(texts, tokenizer, max_len=128):
     return tokenizer(
         return_tensors="pt"
     )
+def create_dataset(df, tokenizer, max_len=128):
+    encodings = tokenize_batch(df["text"].tolist(), tokenizer, max_len)
+    labels = torch.tensor(df[LIST_LABEL].values, dtype=torch.float)
     return TensorDataset(
+        encodings["input_ids"],
+        encodings["attention_mask"],
         labels
     )
 # ==============================================================
+#                           Weights
 # ==============================================================
+def hitung_pos_weight(df):
+    """Biar adil kalau datanya imbalanced."""
+    counts = df[LIST_LABEL].sum(axis=0)
     N = len(df)
     pw = []
     for c in counts:
         pw.append((N - c) / c if c > 0 else 1.0)
     return torch.tensor(pw, dtype=torch.float)
 # ==============================================================
+#                       Save & Load Logic
 # ==============================================================
 def save_model(model, tokenizer, folder):
     os.makedirs(folder, exist_ok=True)
     model.base.save_pretrained(folder)
     tokenizer.save_pretrained(folder)
+    torch.save(model.classifier.state_dict(), str(Path(folder) / "classifier_head.pt"))
+    # Update panggilan fungsi di sini
+    save_last_model(str(folder))
 def load_model(folder):
     folder = str(folder)
     config = AutoConfig.from_pretrained(folder)
     tokenizer = AutoTokenizer.from_pretrained(folder)
+    model = ModelEmosi(folder)
+    state = torch.load(f"{folder}/classifier_head.pt", map_location="cpu")
+    model.classifier.load_state_dict(state)
     model.eval()
     return model, tokenizer, config
 # ==============================================================
+#                            TRAINING
 # ==============================================================
+def jalankan_training(
     df,
     model_name="bert-base-multilingual-cased",
     epochs=3,
     device=None
 ):
     device = device or ("cuda" if torch.cuda.is_available() else "cpu")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     df = df.reset_index(drop=True)
+    full_dataset = create_dataset(df, tokenizer, max_len)
+    idx = list(range(len(full_dataset)))
     train_idx, val_idx = train_test_split(idx, test_size=0.15, random_state=42)
+    def get_subset(ds, indices):
         return TensorDataset(
+            torch.stack([ds[i][0] for i in indices]),
+            torch.stack([ds[i][1] for i in indices]),
+            torch.stack([ds[i][2] for i in indices]),
         )
+    train_ds = get_subset(full_dataset, train_idx)
+    val_ds = get_subset(full_dataset, val_idx)
     train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
     val_loader = DataLoader(val_ds, batch_size=batch_size)
+    model = ModelEmosi(model_name)
     model.to(device)
     for name, param in model.base.named_parameters():
         if name.startswith("embeddings."):
             param.requires_grad = False
                     param.requires_grad = False
             except:
                 pass
+    pos_weight = hitung_pos_weight(df).to(device)
     loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
     optimizer = torch.optim.AdamW(
         filter(lambda p: p.requires_grad, model.parameters()),
         lr=lr,
         weight_decay=weight_decay
     )
     total_steps = len(train_loader) * epochs
     warmup_steps = int(warmup_ratio * total_steps)
     scheduler = get_linear_schedule_with_warmup(
         optimizer,
         num_warmup_steps=warmup_steps,
         num_training_steps=total_steps
     )
+    best_val_loss = float("inf")
     no_improve = 0
     history = {"train_loss": [], "val_loss": []}
+    save_path = str(get_model_path(model_name))
     for ep in range(1, epochs+1):
         model.train()
+        total_train_loss = 0
+        for input_ids, mask, labels in train_loader:
             input_ids = input_ids.to(device)
+            mask = mask.to(device)
             labels = labels.to(device)
             optimizer.zero_grad()
+            logits = model(input_ids, mask)
             loss = loss_fn(logits, labels)
             loss.backward()
             optimizer.step()
             scheduler.step()
+            total_train_loss += loss.item() * input_ids.size(0)
+        avg_train_loss = total_train_loss / len(train_loader.dataset)
+        history["train_loss"].append(avg_train_loss)
         model.eval()
+        total_val_loss = 0
         with torch.no_grad():
+            for input_ids, mask, labels in val_loader:
                 input_ids = input_ids.to(device)
+                mask = mask.to(device)
                 labels = labels.to(device)
+                logits = model(input_ids, mask)
                 loss = loss_fn(logits, labels)
+                total_val_loss += loss.item() * input_ids.size(0)
+        avg_val_loss = total_val_loss / len(val_loader.dataset)
+        history["val_loss"].append(avg_val_loss)
+        print(f"Epoch {ep} | Train Loss={avg_train_loss:.4f} | Val Loss={avg_val_loss:.4f}")
+        if avg_val_loss < best_val_loss:
+            best_val_loss = avg_val_loss
             no_improve = 0
             save_model(model, tokenizer, save_path)
+            print(f"Best model saved to {save_path}")
         else:
             no_improve += 1
             if no_improve >= patience:
+                print("Early stopping triggered.")
                 break
     return model, tokenizer, history
 # ==============================================================
+#                            PREDICTION
 # ==============================================================
+def predict_satu(text, folder=None):
+    # Update panggilan fungsi di sini
+    folder = folder or load_last_model()
+    if folder is None:
+        return {"Error": "Belum ada model yang dilatih."}
+    model, tokenizer, _ = load_model(folder)
     encoded = tokenizer(
         text,
         padding="max_length",
         max_length=128,
         return_tensors="pt"
     )
     with torch.no_grad():
         out = model(encoded["input_ids"], encoded["attention_mask"])
         probs = torch.sigmoid(out).numpy()[0]
+    return {LIST_LABEL[i]: float(probs[i]) for i in range(len(LIST_LABEL))}
+def predict_batch(text_list, folder=None, batch_size=32):
+    # Update panggilan fungsi di sini
+    folder = folder or load_last_model()
+    if folder is None:
+        return []
+    model, tokenizer, _ = load_model(folder)
     preds = []
+    for i in range(0, len(text_list), batch_size):
+        batch = text_list[i:i+batch_size]
+        encoded = tokenizer(
             batch,
             padding="max_length",
             truncation=True,
             max_length=128,
             return_tensors="pt"
         )
         with torch.no_grad():
+            out = model(encoded["input_ids"], encoded["attention_mask"])
             probs = torch.sigmoid(out).numpy()
         for p in probs:
+            preds.append({LIST_LABEL[j]: float(p[j]) for j in range(len(LIST_LABEL))})
     return preds
+def summarize_result(preds):
+    if not preds:
+        return {"Info": "Tidak ada hasil."}
+    avg = {l: 0.0 for l in LIST_LABEL}
     n = len(preds)
     for p in preds:
         for l,v in p.items():
             avg[l] += v
     for l in avg:
         avg[l] /= n
     top3 = sorted(avg.items(), key=lambda x: x[1], reverse=True)[:3]
+    top3_fmt = [{"label":l, "score":float(s)} for l,s in top3]
+    return {
+        "jumlah_data": n,
+        "distribusi_rata2": avg,
+        "top_3": top3_fmt
+    }
 # ==============================================================
+#                             GRADIO UI
 # ==============================================================
+def wrapper_training(file_obj, sep, model_name, epoch, batch, lr,
+                  max_len, wd, warmup, pat, freeze):
+    csv_path = read_file_upload(file_obj)
+    df = pd.read_csv(csv_path, sep=sep)
     df = clean_labels(df)
     df = clean_text(df)
+    _, _, history = jalankan_training(
         df=df,
         model_name=model_name,
+        epochs=int(epoch),
+        batch_size=int(batch),
         lr=float(lr),
         max_len=int(max_len),
+        weight_decay=float(wd),
+        warmup_ratio=float(warmup),
+        patience=int(pat),
+        freeze_layers=int(freeze)
     )
     return {
+        "status": "Training Selesai!",
         "history": history,
+        "model_used": model_name
     }
+def wrapper_predict_satu(text):
+    return predict_satu(text)
+def wrapper_predict_dataset(file_obj, sep, batch_size):
+    csv_path = read_file_upload(file_obj)
+    df = pd.read_csv(csv_path, sep=sep)
     df = clean_labels(df)
     df = clean_text(df)
     preds = predict_batch(df["text"].tolist(), batch_size=int(batch_size))
+    return summarize_result(preds)
 # ==============================================================
+#                         INTERFACE
 # ==============================================================
 with gr.Blocks() as app:
+    gr.Markdown("## Emotion Classifier — IndoBERT / Multilingual")
+    with gr.Tab("Menu Training"):
+        gr.Markdown("Upload dataset CSV untuk fine-tuning model.")
+        in_file = gr.File(label="Upload File CSV")
+        in_sep = gr.Textbox(label="Delimiter (Pemisah)", value=";")
+        in_model = gr.Dropdown(
+            label="Base Model",
             choices=["bert-base-multilingual-cased", "indobert-base-p1"],
             value="bert-base-multilingual-cased"
         )
+        with gr.Row():
+            in_epoch = gr.Number(label="Epochs", value=3)
+            in_batch = gr.Number(label="Batch Size", value=8)
+            in_lr = gr.Number(label="Learning Rate", value=2e-5)
+        with gr.Row():
+            in_len = gr.Number(label="Max Length", value=128)
+            in_pat = gr.Number(label="Patience (Early Stop)", value=2)
+            in_freeze = gr.Number(label="Freeze Layers", value=6)
+        # Hidden advanced params
+        in_wd = gr.Number(label="Weight Decay", value=0.01, visible=False)
+        in_warmup = gr.Number(label="Warmup Ratio", value=0.1, visible=False)
+        btn_train = gr.Button("Mulai Training", variant="primary")
+        out_train = gr.JSON(label="Training Log")
         btn_train.click(
+            wrapper_training,
+            inputs=[in_file, in_sep, in_model, in_epoch, in_batch,
+                    in_lr, in_len, in_wd, in_warmup, in_pat, in_freeze],
             outputs=out_train
         )
+    with gr.Tab("Tes Satu Kalimat"):
+        in_text = gr.Textbox(label="Input Teks", placeholder="Contoh: Aku senang sekali hari ini...")
+        btn_satu = gr.Button("Prediksi")
+        out_satu = gr.Label(label="Confidence Score")
+        btn_satu.click(wrapper_predict_satu, inputs=[in_text], outputs=out_satu)
+    with gr.Tab("Tes Satu File"):
+        gr.Markdown("Upload file CSV baru untuk prediksi massal.")
+        in_file_test = gr.File(label="Upload CSV")
+        in_sep_test = gr.Textbox(label="Delimiter", value=";")
+        in_bs_test = gr.Number(label="Batch Size", value=32)
         btn_test = gr.Button("Run Prediction")
+        out_test = gr.JSON(label="Summary")
         btn_test.click(
+            wrapper_predict_dataset,
+            inputs=[in_file_test, in_sep_test, in_bs_test],
             outputs=out_test
         )