import os import torch from transformers import RobertaTokenizer, RobertaForSequenceClassification import torch.nn.functional as F from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN if HF_ENDPOINT: os.environ["HF_ENDPOINT"] = HF_ENDPOINT class BiasPredictor: def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"): print("Loading model and tokenizer once...") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN) self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN) self.model.to(self.device) self.model.eval() print("\n--- CLASSIFIER PARAM CHECK ---") for name, param in self.model.named_parameters(): if "classifier" in name: print(name, param.requires_grad, param.data.mean().item()) print("--- END CHECK ---\n") self.label_map = { 0: "Not Biased", 1: "Biased" } def predict(self, text): inputs = self.tokenizer( text, return_tensors="pt", truncation=True, max_length=128, padding=True ).to(self.device) with torch.no_grad(): outputs = self.model(**inputs) logits = outputs.logits probs = F.softmax(logits, dim=-1) predicted_class_id = probs.argmax().item() confidence = probs[0][predicted_class_id].item() return { "text": text, "class_id": predicted_class_id, "label": self.label_map.get(predicted_class_id, "Unknown"), "confidence": confidence, "probabilities": probs[0].tolist() } def predict_batch(self, texts: list[str]) -> list[dict]: inputs = self.tokenizer( texts, return_tensors="pt", truncation=True, max_length=128, padding=True ).to(self.device) with torch.no_grad(): outputs = self.model(**inputs) logits = outputs.logits probs = F.softmax(logits, dim=-1) results = [] for i, text in enumerate(texts): predicted_class_id = probs[i].argmax().item() confidence = probs[i][predicted_class_id].item() results.append({ "text": text, "class_id": predicted_class_id, "label": self.label_map.get(predicted_class_id, "Unknown"), "confidence": confidence, "probabilities": probs[i].tolist() }) return results if __name__ == "__main__": predictor = BiasPredictor() texts = [ "The government brutally crushed the peaceful protesters.", "The government deployed police officers to the protest site.", "Scientists warn of accelerating climate change impacts.", "Climate alarmists continue pushing their radical agenda." ] print("\n--- BATCH TEST ---") results = predictor.predict_batch(texts) for r in results: print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}") print("\n ------- Single pass test for each text seprately ----------") for text in [ "The government brutally crushed the peaceful protesters.", "The government deployed police officers to the protest site.", "Scientists warn of accelerating climate change impacts.", "Climate alarmists continue pushing their radical agenda." ]: r = predictor.predict(text) print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")