from preprocess_data import remove_html,remove_links
import torch
from model import BERT_Arch
from transformers import AutoModel,BertTokenizerFast
import numpy as np
def evaluate(data,device):
bert = AutoModel.from_pretrained('bert-base-uncased')
data = [data]
for param in bert.parameters():
param.requires_grad = False
model = BERT_Arch(bert)
map_location = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.load_state_dict(torch.load("/content/model.pth", weights_only=True,map_location=map_location))
model = model.to(device)
data = [remove_html(i) for i in data]
data = [remove_links(i) for i in data]
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
tokenized = tokenizer.batch_encode_plus(data,
max_length = 25,
pad_to_max_length=True,
truncation=True
)
tokenized_seq = torch.tensor(tokenized['input_ids'])
tokenized_mask = torch.tensor(tokenized['attention_mask'])
with torch.no_grad():
preds = model(tokenized_seq.to(device), tokenized_mask.to(device))
preds = preds.detach().cpu().numpy()
pred = np.argmax(preds, axis = 1)
return pred