File size: 3,820 Bytes
208266a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import torch 
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn.functional as F
from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN

if HF_ENDPOINT:
    os.environ["HF_ENDPOINT"] = HF_ENDPOINT

class BiasPredictor:
    def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
        
        print("Loading model and tokenizer once...")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
        self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
        self.model.to(self.device)
        self.model.eval()
        print("\n--- CLASSIFIER PARAM CHECK ---")
        for name, param in self.model.named_parameters():
            if "classifier" in name:
                print(name, param.requires_grad, param.data.mean().item())
        print("--- END CHECK ---\n")

        self.label_map = {
            0: "Not Biased",
            1: "Biased"
        }

    def predict(self, text):
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=128,
            padding=True
        ).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits

            probs = F.softmax(logits, dim=-1)
            predicted_class_id = probs.argmax().item()
            confidence = probs[0][predicted_class_id].item()

        return {
            "text": text,
            "class_id": predicted_class_id,
            "label": self.label_map.get(predicted_class_id, "Unknown"),
            "confidence": confidence,
            "probabilities": probs[0].tolist()
        }
    
    def predict_batch(self, texts: list[str]) -> list[dict]:
        inputs = self.tokenizer(
            texts,
            return_tensors="pt",
            truncation=True,
            max_length=128,
            padding=True
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            probs = F.softmax(logits, dim=-1)

        results = []
        for i, text in enumerate(texts):
            predicted_class_id = probs[i].argmax().item()
            confidence = probs[i][predicted_class_id].item()
            results.append({
                "text": text,
                "class_id": predicted_class_id,
                "label": self.label_map.get(predicted_class_id, "Unknown"),
                "confidence": confidence,
                "probabilities": probs[i].tolist()
            })

        return results
        
if __name__ == "__main__":
    predictor = BiasPredictor()

    texts = [
        "The government brutally crushed the peaceful protesters.",
        "The government deployed police officers to the protest site.",
        "Scientists warn of accelerating climate change impacts.",
        "Climate alarmists continue pushing their radical agenda."
    ]

    print("\n--- BATCH TEST ---")
    results = predictor.predict_batch(texts)
    for r in results:
        print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")

    print("\n ------- Single pass test for each text seprately ----------")
    for text in [
            "The government brutally crushed the peaceful protesters.",
            "The government deployed police officers to the protest site.",
            "Scientists warn of accelerating climate change impacts.",
            "Climate alarmists continue pushing their radical agenda."
        ]:
            r = predictor.predict(text)
            print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")