TrioF commited on
Commit
75d06a1
·
verified ·
1 Parent(s): 0cb66b9

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +74 -0
  2. model.py +33 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoConfig
4
+ from huggingface_hub import hf_hub_url
5
+ import os
6
+
7
+ # Impor kelas kustom Anda secara eksplisit
8
+ from model import IndoBERTClassifier
9
+
10
+ # --- Konfigurasi dan Pemuatan Model ---
11
+ MODEL_ID = "TrioF/KlikBERT"
12
+
13
+ # Muat tokenizer dan config dari Hub
14
+ config = AutoConfig.from_pretrained(MODEL_ID)
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
+
17
+ # Inisialisasi kelas kustom dan muat bobot dari Hub
18
+ model = IndoBERTClassifier(config)
19
+ model_path = hf_hub_url(repo_id=MODEL_ID, filename="pytorch_model.bin")
20
+ model.load_state_dict(torch.hub.load_state_dict_from_url(model_path, map_location="cpu"))
21
+ model.eval()
22
+
23
+ # --- Pemetaan Label ---
24
+ # Pastikan config.json Anda sudah menggunakan 'custom_id2label'
25
+ id2label_clickbait = config.custom_id2label['clickbait']
26
+ id2label_kategori = config.custom_id2label['kategori']
27
+
28
+
29
+ # --- Fungsi Prediksi ---
30
+ def predict(judul, isi):
31
+ inputs = tokenizer(
32
+ judul,
33
+ isi,
34
+ truncation=True,
35
+ padding=True,
36
+ max_length=512,
37
+ return_tensors="pt"
38
+ )
39
+
40
+ with torch.no_grad():
41
+ outputs = model(**inputs)
42
+
43
+ clickbait_logits = outputs["clickbait_logits"]
44
+ kategori_logits = outputs["kategori_logits"]
45
+
46
+ pred_clickbait_id = torch.argmax(clickbait_logits, dim=1).item()
47
+ pred_kategori_id = torch.argmax(kategori_logits, dim=1).item()
48
+
49
+ pred_clickbait_label = id2label_clickbait[str(pred_clickbait_id)]
50
+ pred_kategori_label = id2label_kategori[str(pred_kategori_id)]
51
+
52
+ # --- PERUBAHAN DI SINI ---
53
+ # Kembalikan dua nilai terpisah, bukan dictionary
54
+ return pred_clickbait_label, pred_kategori_label
55
+
56
+
57
+ # --- Antarmuka Gradio ---
58
+ inputs = [
59
+ gr.Textbox(lines=2, label="Judul Berita", placeholder="Masukkan judul berita di sini..."),
60
+ gr.Textbox(lines=10, label="Isi Berita", placeholder="Masukkan isi berita di sini...")
61
+ ]
62
+
63
+ # --- PERUBAHAN DI SINI ---
64
+ # Gunakan dua komponen output terpisah
65
+ outputs = [
66
+ gr.Text(label="Prediksi Clickbait"),
67
+ gr.Text(label="Prediksi Kategori Berita")
68
+ ]
69
+
70
+ title = "Model Multi-Task KlikBERT"
71
+ description = "Model ini memprediksi apakah judul clickbait dan apa kategori beritanya. Model ini dimuat dari repositori TrioF/KlikBERT."
72
+
73
+ iface = gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=title, description=description)
74
+ iface.launch()
model.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Nama file: model.py
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from transformers import AutoModel, AutoConfig
6
+
7
+ class IndoBERTClassifier(nn.Module):
8
+ def __init__(self, config):
9
+ super(IndoBERTClassifier, self).__init__()
10
+ # Gunakan config dari model dasar untuk mengambil hidden_size
11
+ self.bert = AutoModel.from_pretrained(config._name_or_path, config=config)
12
+ self.dropout = nn.Dropout(config.classifier_dropout if hasattr(config, 'classifier_dropout') else 0.1)
13
+ hidden_size = self.bert.config.hidden_size
14
+
15
+ self.num_clickbait_labels = config.num_clickbait_labels
16
+ self.num_kategori_labels = config.num_kategori_labels
17
+
18
+ self.clickbait_classifier = nn.Linear(hidden_size, self.num_clickbait_labels)
19
+ self.kategori_classifier = nn.Linear(hidden_size, self.num_kategori_labels)
20
+
21
+ def forward(self, input_ids, attention_mask, clickbait_labels=None, kategori_labels=None, **kwargs):
22
+ output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
23
+ pooled_output = output.last_hidden_state[:, 0, :] # Ambil token [CLS]
24
+
25
+ dropout_output = self.dropout(pooled_output)
26
+
27
+ clickbait_logits = self.clickbait_classifier(dropout_output)
28
+ kategori_logits = self.kategori_classifier(dropout_output)
29
+
30
+ return {
31
+ "clickbait_logits": clickbait_logits,
32
+ "kategori_logits": kategori_logits
33
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ scikit-learn
4
+ gradio