Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| from transformers import RobertaTokenizer, RobertaForSequenceClassification | |
| import torch.nn.functional as F | |
| from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN | |
| if HF_ENDPOINT: | |
| os.environ["HF_ENDPOINT"] = HF_ENDPOINT | |
| class BiasPredictor: | |
| def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"): | |
| print("Loading model and tokenizer once...") | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN) | |
| self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN) | |
| self.model.to(self.device) | |
| self.model.eval() | |
| print("\n--- CLASSIFIER PARAM CHECK ---") | |
| for name, param in self.model.named_parameters(): | |
| if "classifier" in name: | |
| print(name, param.requires_grad, param.data.mean().item()) | |
| print("--- END CHECK ---\n") | |
| self.label_map = { | |
| 0: "Not Biased", | |
| 1: "Biased" | |
| } | |
| def predict(self, text): | |
| inputs = self.tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=128, | |
| padding=True | |
| ).to(self.device) | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| logits = outputs.logits | |
| probs = F.softmax(logits, dim=-1) | |
| predicted_class_id = probs.argmax().item() | |
| confidence = probs[0][predicted_class_id].item() | |
| return { | |
| "text": text, | |
| "class_id": predicted_class_id, | |
| "label": self.label_map.get(predicted_class_id, "Unknown"), | |
| "confidence": confidence, | |
| "probabilities": probs[0].tolist() | |
| } | |
| def predict_batch(self, texts: list[str]) -> list[dict]: | |
| inputs = self.tokenizer( | |
| texts, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=128, | |
| padding=True | |
| ).to(self.device) | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| logits = outputs.logits | |
| probs = F.softmax(logits, dim=-1) | |
| results = [] | |
| for i, text in enumerate(texts): | |
| predicted_class_id = probs[i].argmax().item() | |
| confidence = probs[i][predicted_class_id].item() | |
| results.append({ | |
| "text": text, | |
| "class_id": predicted_class_id, | |
| "label": self.label_map.get(predicted_class_id, "Unknown"), | |
| "confidence": confidence, | |
| "probabilities": probs[i].tolist() | |
| }) | |
| return results | |
| if __name__ == "__main__": | |
| predictor = BiasPredictor() | |
| texts = [ | |
| "The government brutally crushed the peaceful protesters.", | |
| "The government deployed police officers to the protest site.", | |
| "Scientists warn of accelerating climate change impacts.", | |
| "Climate alarmists continue pushing their radical agenda." | |
| ] | |
| print("\n--- BATCH TEST ---") | |
| results = predictor.predict_batch(texts) | |
| for r in results: | |
| print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}") | |
| print("\n ------- Single pass test for each text seprately ----------") | |
| for text in [ | |
| "The government brutally crushed the peaceful protesters.", | |
| "The government deployed police officers to the protest site.", | |
| "Scientists warn of accelerating climate change impacts.", | |
| "Climate alarmists continue pushing their radical agenda." | |
| ]: | |
| r = predictor.predict(text) | |
| print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}") | |