Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments | |
| import numpy as np | |
| from sklearn.metrics import accuracy_score, precision_recall_fscore_support | |
| model_name = "indobenchmark/indobert-base-p1" | |
| dataset = load_dataset("csv", data_files="data/eval_dataset.csv") | |
| label_map = {"negative":0, "neutral":1, "positive":2} | |
| def encode_label(example): | |
| example["label"] = label_map[example["label"]] | |
| return example | |
| dataset = dataset.map(encode_label) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| def preprocess(example): | |
| return tokenizer(example["text"], truncation=True, padding=True) | |
| dataset = dataset.map(preprocess) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) | |
| def compute_metrics(eval_pred): | |
| logits, labels = eval_pred | |
| preds = np.argmax(logits, axis=1) | |
| precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted') | |
| acc = accuracy_score(labels, preds) | |
| return { | |
| "accuracy": acc, | |
| "f1": f1, | |
| "precision": precision, | |
| "recall": recall | |
| } | |
| training_args = TrainingArguments( | |
| output_dir="./model", | |
| learning_rate=2e-5, | |
| per_device_train_batch_size=8, | |
| num_train_epochs=3, | |
| evaluation_strategy="epoch" | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset["train"], | |
| eval_dataset=dataset["train"], | |
| compute_metrics=compute_metrics | |
| ) | |
| trainer.train() | |
| # 🔥 simpan model | |
| trainer.save_model("model/final_model") | |
| tokenizer.save_pretrained("model/final_model") |