Kalaphant commited on
Commit
87d2a20
·
verified ·
1 Parent(s): 045829b

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +40 -0
train.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from transformers import BertForSequenceClassification, Trainer, TrainingArguments
3
+ from transformers import BertTokenizer
4
+
5
+ # Load the dataset
6
+ dataset = load_dataset('csv', data_files='dataset.csv')
7
+
8
+ # Load the tokenizer
9
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
10
+
11
+ # Tokenize the dataset
12
+ def tokenize_function(examples):
13
+ return tokenizer(examples['question'], padding="max_length", truncation=True)
14
+
15
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
16
+
17
+ # Load the model
18
+ model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
19
+
20
+ # Define training arguments
21
+ training_args = TrainingArguments(
22
+ output_dir="./results",
23
+ evaluation_strategy="epoch",
24
+ learning_rate=2e-5,
25
+ per_device_train_batch_size=8,
26
+ per_device_eval_batch_size=8,
27
+ num_train_epochs=3,
28
+ weight_decay=0.01,
29
+ )
30
+
31
+ # Create Trainer instance
32
+ trainer = Trainer(
33
+ model=model,
34
+ args=training_args,
35
+ train_dataset=tokenized_datasets['train'],
36
+ eval_dataset=tokenized_datasets['test']
37
+ )
38
+
39
+ # Train the model
40
+ trainer.train()