HW3 / train.py
ArnaudVH's picture
Final Train and Test files
dea7034
import torch
from transformers import (
DataCollatorWithPadding,
AutoModelForSequenceClassification,
AutoTokenizer,
TrainingArguments,
Trainer,
)
from split_data import make_train_data
# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load model and tokeniser
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased",
num_labels=2,
id2label=id2label,
label2id=label2id,
#Add dropout for hidden and attention layers
hidden_dropout_prob=0.3,
attention_probs_dropout_prob=0.3
).to(device)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# Preprocessing function
def tokenize_func(data):
return tokenizer(data["text"], truncation=True)
# Load and pre-process dataset
train_data, validation_data = make_train_data()
tokenized_train_data = train_data.map(tokenize_func, batched=True)
tokenized_validation_data = validation_data.map(tokenize_func, batched=True)
# Data collator
data_collator = DataCollatorWithPadding(tokenizer)
steps_per_epoch = len(tokenized_train_data) // 16
logging_steps = steps_per_epoch // 25
# Training arguments
training_args = TrainingArguments(
output_dir='./finetuned',
learning_rate=1.0e-5,
per_device_train_batch_size=32,
num_train_epochs=2,
save_total_limit=2,
#Weight decay
weight_decay=0.01,
fp16=torch.cuda.is_available(),
logging_dir='./logs',
logging_steps=logging_steps,
eval_strategy="steps",
eval_steps=logging_steps,
save_strategy="steps",
save_steps=logging_steps,
)
# Trainer instance
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=tokenized_train_data,
eval_dataset=tokenized_validation_data,
)
# Train
trainer.train()
trainer.save_model()
tokenizer.save_pretrained('./finetuned')