Spaces:

Jitender20
/

newslens

Sleeping

File size: 3,820 Bytes

208266a

import os
import torch 
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn.functional as F
from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN

if HF_ENDPOINT:
    os.environ["HF_ENDPOINT"] = HF_ENDPOINT

class BiasPredictor:
    def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
        
        print("Loading model and tokenizer once...")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
        self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
        self.model.to(self.device)
        self.model.eval()
        print("\n--- CLASSIFIER PARAM CHECK ---")
        for name, param in self.model.named_parameters():
            if "classifier" in name:
                print(name, param.requires_grad, param.data.mean().item())
        print("--- END CHECK ---\n")

        self.label_map = {
            0: "Not Biased",
            1: "Biased"
        }

    def predict(self, text):
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=128,
            padding=True
        ).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits

            probs = F.softmax(logits, dim=-1)
            predicted_class_id = probs.argmax().item()
            confidence = probs[0][predicted_class_id].item()

        return {
            "text": text,
            "class_id": predicted_class_id,
            "label": self.label_map.get(predicted_class_id, "Unknown"),
            "confidence": confidence,
            "probabilities": probs[0].tolist()
        }
    
    def predict_batch(self, texts: list[str]) -> list[dict]:
        inputs = self.tokenizer(
            texts,
            return_tensors="pt",
            truncation=True,
            max_length=128,
            padding=True
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            probs = F.softmax(logits, dim=-1)

        results = []
        for i, text in enumerate(texts):
            predicted_class_id = probs[i].argmax().item()
            confidence = probs[i][predicted_class_id].item()
            results.append({
                "text": text,
                "class_id": predicted_class_id,
                "label": self.label_map.get(predicted_class_id, "Unknown"),
                "confidence": confidence,
                "probabilities": probs[i].tolist()
            })

        return results
        
if __name__ == "__main__":
    predictor = BiasPredictor()

    texts = [
        "The government brutally crushed the peaceful protesters.",
        "The government deployed police officers to the protest site.",
        "Scientists warn of accelerating climate change impacts.",
        "Climate alarmists continue pushing their radical agenda."
    ]

    print("\n--- BATCH TEST ---")
    results = predictor.predict_batch(texts)
    for r in results:
        print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")

    print("\n ------- Single pass test for each text seprately ----------")
    for text in [
            "The government brutally crushed the peaceful protesters.",
            "The government deployed police officers to the protest site.",
            "Scientists warn of accelerating climate change impacts.",
            "Climate alarmists continue pushing their radical agenda."
        ]:
            r = predictor.predict(text)
            print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")