File size: 1,081 Bytes
5661ef4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import torch
from transformers import BertForSequenceClassification, BertTokenizer, DataCollatorForTokenClassification
import numpy as np
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
class BERT():
def __init__(self):
self.num_classes = 13
self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# Load pre-trained model and tokenizer
self.model = BertForSequenceClassification.from_pretrained("bert-base-uncased").to(self.device)
self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# Modify the output layer to match the number of classes
self.model.classifier = torch.nn.Linear(in_features = 768, out_features= self.num_classes)
self.data_collator = DataCollatorForTokenClassification(self.tokenizer)
def getModel(self):
return self.model
def get_tokenizer(self):
return self.tokenizer
def tokenize(self, txt):
return self.tokenizer(txt, return_tensors='pt')
|