Spaces:

FikriRiyadi
/

indoroberta_multilabel

Sleeping

indoroberta_multilabel / predict_utils.py

Create predict_utils.py

26b8569 verified 8 months ago

1.22 kB

	import torch
	import numpy as np
	from transformers import AutoTokenizer
	from model import CyberRoBERTa

	LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race',
	'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	def load_model_and_thresholds():
	model = CyberRoBERTa()
	model.load_state_dict(torch.load("indoroberta_multilabel_model84%.bin", map_location=DEVICE))
	model.to(DEVICE)
	model.eval()

	tokenizer = AutoTokenizer.from_pretrained("cahya/roberta-base-indonesian-522M")
	thresholds = np.load("optimal_thresholds.npy")
	return model, tokenizer, thresholds

	def predict(text, model, tokenizer, thresholds):
	encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
	input_ids = encoding['input_ids'].to(DEVICE)
	attention_mask = encoding['attention_mask'].to(DEVICE)

	with torch.no_grad():
	outputs = model(input_ids, attention_mask)
	probs = outputs[0].cpu().numpy()
	preds_bin = (probs > thresholds).astype(int)

	return {label: float(prob) for label, prob in zip(LABELS, probs)}