| from transformers import pipeline, AutoTokenizer |
| from split_data import make_test_data |
| import torch |
| from torchmetrics.classification import MulticlassConfusionMatrix |
| label2id = { |
| "POSITIVE": 1, |
| "NEGATIVE": 0, |
| } |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("./finetuned") |
| classifier = pipeline("sentiment-analysis", model="./finetuned", tokenizer=tokenizer, max_length=512, truncation=True) |
|
|
| test_data = make_test_data() |
| results = classifier(test_data['text']) |
|
|
| true_labels = test_data["label"] |
| texts = test_data["text"] |
|
|
| |
| label_to_id = {"NEGATIVE": 0, "POSITIVE": 1} |
|
|
| predicted_labels = [] |
| for text in texts: |
| prediction = classifier(text) |
| predicted_label = label_to_id[prediction[0]["label"]] |
| predicted_labels.append(predicted_label) |
|
|
| predicted_tensor = torch.tensor(predicted_labels) |
| true_tensor = torch.tensor(true_labels) |
|
|
| |
| confusion_matrix = MulticlassConfusionMatrix(num_classes=2)(predicted_tensor, true_tensor) |
|
|
| print("Confusion Matrix") |
| print(confusion_matrix) |