File size: 1,081 Bytes
5661ef4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch

from transformers import BertForSequenceClassification, BertTokenizer, DataCollatorForTokenClassification
import numpy as np
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

class BERT():
    def __init__(self):
        self.num_classes = 13
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")



# Load pre-trained model and tokenizer
        self.model = BertForSequenceClassification.from_pretrained("bert-base-uncased").to(self.device)
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Modify the output layer to match the number of classes
        self.model.classifier = torch.nn.Linear(in_features = 768, out_features= self.num_classes)
        self.data_collator = DataCollatorForTokenClassification(self.tokenizer)
        
    def getModel(self):
        return self.model
    
    
    def get_tokenizer(self):
        return self.tokenizer
    
    def tokenize(self, txt):
        return self.tokenizer(txt, return_tensors='pt')