from typing import List, Dict import numpy as np import torch from transformers import BertForSequenceClassification, BertTokenizer from sklearn.preprocessing import LabelEncoder from huggingface_hub import hf_hub_download class CustomBertClassifier: def __init__(self): # Load model and tokenizer self.model = BertForSequenceClassification.from_pretrained(".") self.tokenizer = BertTokenizer.from_pretrained(".") self.model.eval() # Load label classes label_path = hf_hub_download(repo_id="JaySenpai/bert-model", filename="label_classes.npy") self.le = LabelEncoder() self.le.classes_ = np.load(label_path, allow_pickle=True) def __call__(self, inputs: str) -> List[Dict]: # Tokenize input inputs = self.tokenizer(inputs, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = self.model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) probs = probs[0].tolist() # Map to labels results = [] for i, prob in enumerate(probs): results.append({ "label": self.le.classes_[i], "score": round(prob, 4) }) # Sort by score descending results = sorted(results, key=lambda x: x["score"], reverse=True) return results