| | --- |
| | library_name: transformers |
| | tags: [] |
| | --- |
| | ``` |
| | import pandas as pd |
| | import re |
| | import nltk |
| | from nltk.corpus import stopwords |
| | from nltk.stem import WordNetLemmatizer |
| | from transformers import BertTokenizer, BertForSequenceClassification |
| | import torch |
| | from safetensors.torch import load_file |
| | |
| | def evaluate(test_data): |
| | |
| | tokenizer = BertTokenizer.from_pretrained("CIS5190-PROJ/BERTv3") |
| | model = BertForSequenceClassification.from_pretrained("CIS5190-PROJ/BERTv3") |
| | |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model.to(device) |
| | model.eval() |
| | |
| | |
| | test_texts = test_data['title'].tolist() |
| | test_encodings = tokenizer(test_texts, truncation=True, padding="max_length", max_length=64) |
| | test_encodings = {key: torch.tensor(val).to(device) for key, val in test_encodings.items()} |
| | with torch.no_grad(): |
| | outputs = model(**test_encodings) |
| | logits = outputs.logits |
| | predictions = torch.argmax(logits, dim=1).cpu().numpy() |
| | return 1- predictions |
| | ``` |