Spaces:
Sleeping
Sleeping
| import torch | |
| import numpy as np | |
| from transformers import AutoTokenizer | |
| from model import CyberRoBERTa | |
| LABELS = ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', | |
| 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong'] | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def load_model_and_thresholds(): | |
| model = CyberRoBERTa() | |
| model.load_state_dict(torch.load("indoroberta_multilabel_model84%.bin", map_location=DEVICE)) | |
| model.to(DEVICE) | |
| model.eval() | |
| tokenizer = AutoTokenizer.from_pretrained("cahya/roberta-base-indonesian-522M") | |
| thresholds = np.load("optimal_thresholds.npy") | |
| return model, tokenizer, thresholds | |
| def predict(text, model, tokenizer, thresholds): | |
| encoding = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128) | |
| input_ids = encoding['input_ids'].to(DEVICE) | |
| attention_mask = encoding['attention_mask'].to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(input_ids, attention_mask) | |
| probs = outputs[0].cpu().numpy() | |
| preds_bin = (probs > thresholds).astype(int) | |
| return {label: float(prob) for label, prob in zip(LABELS, probs)} | |