newslens / src /models /test_inference.py
Jitender20's picture
Add NewsLens Streamlit app
208266a
import os
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn.functional as F
from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN
if HF_ENDPOINT:
os.environ["HF_ENDPOINT"] = HF_ENDPOINT
class BiasPredictor:
def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
print("Loading model and tokenizer once...")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
self.model.to(self.device)
self.model.eval()
print("\n--- CLASSIFIER PARAM CHECK ---")
for name, param in self.model.named_parameters():
if "classifier" in name:
print(name, param.requires_grad, param.data.mean().item())
print("--- END CHECK ---\n")
self.label_map = {
0: "Not Biased",
1: "Biased"
}
def predict(self, text):
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=128,
padding=True
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
logits = outputs.logits
probs = F.softmax(logits, dim=-1)
predicted_class_id = probs.argmax().item()
confidence = probs[0][predicted_class_id].item()
return {
"text": text,
"class_id": predicted_class_id,
"label": self.label_map.get(predicted_class_id, "Unknown"),
"confidence": confidence,
"probabilities": probs[0].tolist()
}
def predict_batch(self, texts: list[str]) -> list[dict]:
inputs = self.tokenizer(
texts,
return_tensors="pt",
truncation=True,
max_length=128,
padding=True
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
logits = outputs.logits
probs = F.softmax(logits, dim=-1)
results = []
for i, text in enumerate(texts):
predicted_class_id = probs[i].argmax().item()
confidence = probs[i][predicted_class_id].item()
results.append({
"text": text,
"class_id": predicted_class_id,
"label": self.label_map.get(predicted_class_id, "Unknown"),
"confidence": confidence,
"probabilities": probs[i].tolist()
})
return results
if __name__ == "__main__":
predictor = BiasPredictor()
texts = [
"The government brutally crushed the peaceful protesters.",
"The government deployed police officers to the protest site.",
"Scientists warn of accelerating climate change impacts.",
"Climate alarmists continue pushing their radical agenda."
]
print("\n--- BATCH TEST ---")
results = predictor.predict_batch(texts)
for r in results:
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
print("\n ------- Single pass test for each text seprately ----------")
for text in [
"The government brutally crushed the peaceful protesters.",
"The government deployed police officers to the protest site.",
"Scientists warn of accelerating climate change impacts.",
"Climate alarmists continue pushing their radical agenda."
]:
r = predictor.predict(text)
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")