noranisa's picture
Create training/tuning.py
61fe0b8 verified
raw
history blame contribute delete
849 Bytes
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
model_name = "indobenchmark/indobert-base-p1"
dataset = load_dataset("csv", data_files="data/eval_dataset.csv")
tokenizer = AutoTokenizer.from_pretrained(model_name)
def preprocess(examples):
return tokenizer(examples["text"], truncation=True, padding=True)
dataset = dataset.map(preprocess, batched=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
training_args = TrainingArguments(
output_dir="./results",
learning_rate=2e-5, # 🔥 tuning parameter
per_device_train_batch_size=8,
num_train_epochs=3,
weight_decay=0.01
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"]
)
trainer.train()