Spaces:

Jitender20
/

newslens

Sleeping

App Files Files Community

newslens / src /models /test_inference.py

Jitender20

Add NewsLens Streamlit app

208266a 9 days ago

raw

history blame contribute delete

3.82 kB

	import os
	import torch
	from transformers import RobertaTokenizer, RobertaForSequenceClassification
	import torch.nn.functional as F
	from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN

	if HF_ENDPOINT:
	os.environ["HF_ENDPOINT"] = HF_ENDPOINT

	class BiasPredictor:
	def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):

	print("Loading model and tokenizer once...")
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
	self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
	self.model.to(self.device)
	self.model.eval()
	print("\n--- CLASSIFIER PARAM CHECK ---")
	for name, param in self.model.named_parameters():
	if "classifier" in name:
	print(name, param.requires_grad, param.data.mean().item())
	print("--- END CHECK ---\n")

	self.label_map = {
	0: "Not Biased",
	1: "Biased"
	}

	def predict(self, text):
	inputs = self.tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=128,
	padding=True
	).to(self.device)
	with torch.no_grad():
	outputs = self.model(**inputs)
	logits = outputs.logits

	probs = F.softmax(logits, dim=-1)
	predicted_class_id = probs.argmax().item()
	confidence = probs[0][predicted_class_id].item()

	return {
	"text": text,
	"class_id": predicted_class_id,
	"label": self.label_map.get(predicted_class_id, "Unknown"),
	"confidence": confidence,
	"probabilities": probs[0].tolist()
	}

	def predict_batch(self, texts: list[str]) -> list[dict]:
	inputs = self.tokenizer(
	texts,
	return_tensors="pt",
	truncation=True,
	max_length=128,
	padding=True
	).to(self.device)

	with torch.no_grad():
	outputs = self.model(**inputs)
	logits = outputs.logits
	probs = F.softmax(logits, dim=-1)

	results = []
	for i, text in enumerate(texts):
	predicted_class_id = probs[i].argmax().item()
	confidence = probs[i][predicted_class_id].item()
	results.append({
	"text": text,
	"class_id": predicted_class_id,
	"label": self.label_map.get(predicted_class_id, "Unknown"),
	"confidence": confidence,
	"probabilities": probs[i].tolist()
	})

	return results

	if __name__ == "__main__":
	predictor = BiasPredictor()

	texts = [
	"The government brutally crushed the peaceful protesters.",
	"The government deployed police officers to the protest site.",
	"Scientists warn of accelerating climate change impacts.",
	"Climate alarmists continue pushing their radical agenda."
	]

	print("\n--- BATCH TEST ---")
	results = predictor.predict_batch(texts)
	for r in results:
	print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")

	print("\n ------- Single pass test for each text seprately ----------")
	for text in [
	"The government brutally crushed the peaceful protesters.",
	"The government deployed police officers to the protest site.",
	"Scientists warn of accelerating climate change impacts.",
	"Climate alarmists continue pushing their radical agenda."
	]:
	r = predictor.predict(text)
	print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")