from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer from datasets import load_dataset model_name = "indobenchmark/indobert-base-p1" dataset = load_dataset("csv", data_files="data/eval_dataset.csv") tokenizer = AutoTokenizer.from_pretrained(model_name) def preprocess(examples): return tokenizer(examples["text"], truncation=True, padding=True) dataset = dataset.map(preprocess, batched=True) model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) training_args = TrainingArguments( output_dir="./results", learning_rate=2e-5, # 🔥 tuning parameter per_device_train_batch_size=8, num_train_epochs=3, weight_decay=0.01 ) trainer = Trainer( model=model, args=training_args, train_dataset=dataset["train"] ) trainer.train()