noranisa commited on
Commit
61fe0b8
·
verified ·
1 Parent(s): 8efad72

Create training/tuning.py

Browse files
Files changed (1) hide show
  1. training/tuning.py +31 -0
training/tuning.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
2
+ from datasets import load_dataset
3
+
4
+ model_name = "indobenchmark/indobert-base-p1"
5
+
6
+ dataset = load_dataset("csv", data_files="data/eval_dataset.csv")
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ def preprocess(examples):
11
+ return tokenizer(examples["text"], truncation=True, padding=True)
12
+
13
+ dataset = dataset.map(preprocess, batched=True)
14
+
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
16
+
17
+ training_args = TrainingArguments(
18
+ output_dir="./results",
19
+ learning_rate=2e-5, # 🔥 tuning parameter
20
+ per_device_train_batch_size=8,
21
+ num_train_epochs=3,
22
+ weight_decay=0.01
23
+ )
24
+
25
+ trainer = Trainer(
26
+ model=model,
27
+ args=training_args,
28
+ train_dataset=dataset["train"]
29
+ )
30
+
31
+ trainer.train()