Spaces:
Sleeping
Sleeping
File size: 3,820 Bytes
208266a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | import os
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn.functional as F
from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN
if HF_ENDPOINT:
os.environ["HF_ENDPOINT"] = HF_ENDPOINT
class BiasPredictor:
def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
print("Loading model and tokenizer once...")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
self.model.to(self.device)
self.model.eval()
print("\n--- CLASSIFIER PARAM CHECK ---")
for name, param in self.model.named_parameters():
if "classifier" in name:
print(name, param.requires_grad, param.data.mean().item())
print("--- END CHECK ---\n")
self.label_map = {
0: "Not Biased",
1: "Biased"
}
def predict(self, text):
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=128,
padding=True
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
logits = outputs.logits
probs = F.softmax(logits, dim=-1)
predicted_class_id = probs.argmax().item()
confidence = probs[0][predicted_class_id].item()
return {
"text": text,
"class_id": predicted_class_id,
"label": self.label_map.get(predicted_class_id, "Unknown"),
"confidence": confidence,
"probabilities": probs[0].tolist()
}
def predict_batch(self, texts: list[str]) -> list[dict]:
inputs = self.tokenizer(
texts,
return_tensors="pt",
truncation=True,
max_length=128,
padding=True
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
logits = outputs.logits
probs = F.softmax(logits, dim=-1)
results = []
for i, text in enumerate(texts):
predicted_class_id = probs[i].argmax().item()
confidence = probs[i][predicted_class_id].item()
results.append({
"text": text,
"class_id": predicted_class_id,
"label": self.label_map.get(predicted_class_id, "Unknown"),
"confidence": confidence,
"probabilities": probs[i].tolist()
})
return results
if __name__ == "__main__":
predictor = BiasPredictor()
texts = [
"The government brutally crushed the peaceful protesters.",
"The government deployed police officers to the protest site.",
"Scientists warn of accelerating climate change impacts.",
"Climate alarmists continue pushing their radical agenda."
]
print("\n--- BATCH TEST ---")
results = predictor.predict_batch(texts)
for r in results:
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
print("\n ------- Single pass test for each text seprately ----------")
for text in [
"The government brutally crushed the peaceful protesters.",
"The government deployed police officers to the protest site.",
"Scientists warn of accelerating climate change impacts.",
"Climate alarmists continue pushing their radical agenda."
]:
r = predictor.predict(text)
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
|