Ulys5e commited on
Commit
f615f2f
·
verified ·
1 Parent(s): 9c9411d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -3
README.md CHANGED
@@ -1,3 +1,65 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ pip install transformers datasets torch scikit-learn
5
+ import torch
6
+ from datasets import load_dataset
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import accuracy_score, f1_score
10
+ def load_and_prepare_data():
11
+ dataset = load_dataset("emotion")
12
+ train_dataset = dataset["train"]
13
+ test_dataset = dataset["test"]
14
+ return train_dataset, test_dataset
15
+ def tokenize_dataset(dataset):
16
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
17
+ def tokenize_function(examples):
18
+ return tokenizer(examples["text"], padding="max_length", truncation=True)
19
+ tokenized_dataset = dataset.map(tokenize_function, batched=True)
20
+ return tokenized_dataset
21
+ def load_model():
22
+ num_labels = 6
23
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=num_labels)
24
+ return model
25
+ def define_training_arguments():
26
+ training_args = TrainingArguments(
27
+ output_dir="./results",
28
+ num_train_epochs=3,
29
+ per_device_train_batch_size=16,
30
+ per_device_eval_batch_size=64,
31
+ warmup_steps=500,
32
+ weight_decay=0.01,
33
+ logging_dir="./logs",
34
+ logging_steps=10,
35
+ evaluation_strategy="epoch",
36
+ save_strategy="epoch",
37
+ load_best_model_at_end=True,
38
+ metric_for_best_model="accuracy",
39
+ greater_is_better=True,
40
+ )
41
+ return training_args
42
+ def compute_metrics(eval_pred):
43
+ logits, labels = eval_pred
44
+ predictions = torch.argmax(torch.tensor(logits), dim=-1)
45
+ accuracy = accuracy_score(labels, predictions)
46
+ f1 = f1_score(labels, predictions, average="weighted")
47
+ return {"accuracy": accuracy, "f1": f1}
48
+ def main():
49
+ train_dataset, test_dataset = load_and_prepare_data()
50
+ tokenized_train_dataset = tokenize_dataset(train_dataset)
51
+ tokenized_test_dataset = tokenize_dataset(test_dataset)
52
+ model = load_model()
53
+ training_args = define_training_arguments()
54
+ trainer = Trainer(
55
+ model=model,
56
+ args=training_args,
57
+ train_dataset=tokenized_train_dataset,
58
+ eval_dataset=tokenized_test_dataset,
59
+ compute_metrics=compute_metrics,
60
+ )
61
+ trainer.train()
62
+ trainer.evaluate()
63
+ trainer.save_model()
64
+ if __name__ == "__main__":
65
+ main()