File size: 3,933 Bytes
e75d802
 
 
 
 
3a92873
e75d802
3a92873
 
 
 
e75d802
3a92873
e75d802
 
 
3a92873
e75d802
ddb8efa
 
e0fcebd
ddb8efa
e75d802
 
 
 
 
 
 
 
3a92873
e75d802
 
8dd90db
 
 
 
 
 
 
 
 
 
 
 
 
3a92873
8dd90db
 
e75d802
8dd90db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5474bb
add7ad3
1b7cbf8
c5474bb
 
3a92873
 
e75d802
3a92873
 
e75d802
3a92873
e75d802
3a92873
 
 
 
 
 
e75d802
3a92873
 
e75d802
3a92873
 
e75d802
3a92873
 
e75d802
 
3a92873
e75d802
3a92873
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr  # used to build the web interface
import torch  # used to run the model and handle predictions
from transformers import BertTokenizer, BertForSequenceClassification  # to load our trained model and tokenizer
import zipfile  # for extracting the uploaded model
import os  # to check if folder already exists

# check if the fine-tuned model folder is already extracted
if not os.path.exists("fine_tuned_model"):
    with zipfile.ZipFile("fine_tuned_model.zip", 'r') as zip_ref:
        zip_ref.extractall("fine_tuned_model")

# load tokenizer and model
model_path = "./fine_tuned_model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.eval()  # set model to evaluation mode (important for inference)

# this function will be triggered when user submits a sentence
def detect_bias(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1).squeeze()
        pred_label = torch.argmax(probs).item()
        confidence = round(probs[pred_label].item(), 2)

        # flip label logic because model predictions seem inverted
        pred_label = 1 - pred_label  # flip 0<->1

    # prediction and explanation logic based on flipped label and confidence
    if pred_label == 1:  # now 1 = biased
        if confidence > 0.75:
            final_label = "Biased"
            explanation = (
                "⚠️ This text is likely biased. The model is highly confident that it reflects gender stereotypes or role bias."
            )
        elif 0.5 <= confidence <= 0.75:
            final_label = "Possibly Biased"
            explanation = (
                "πŸ€” This text might contain some gender bias, but the model is not entirely sure. Review it carefully."
            )
        else:
            final_label = "Uncertain"
            explanation = (
                "😐 The model predicted 'biased' but with low confidence. The result may not be reliable."
            )

    elif pred_label == 0:  # now 0 = unbiased
        if confidence > 0.75:
            final_label = "Unbiased"
            explanation = (
                "βœ… This text appears neutral with no strong signs of gender bias based on the model's understanding."
            )
        elif 0.5 <= confidence <= 0.75:
            final_label = "Possibly Unbiased"
            explanation = (
                "πŸ€” This text seems unbiased, but the model isn't highly confident. It may still be worth reviewing."
            )
        else:
            final_label = "Uncertain"
            explanation = (
                "😐 The model predicted 'unbiased' but with low confidence. The result is unclear."
            )

    return {
        "Bias Classification": final_label,
        "Confidence Score": confidence,
        "Explanation": explanation
    }

# build the Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## Bias Bin – Fine-Tuned BERT Version by Aryan, Gowtham & Manoj")
    gr.Markdown("Detect gender bias in text using a BERT model fine-tuned with counterfactual data.")

    # input box for users
    text_input = gr.Textbox(
        label="Enter Narrative Text",
        lines=4,
        placeholder="E.g., 'The woman stayed at home while the man went to work.'"
    )

    # button to submit
    submit_btn = gr.Button("Detect Bias")

    # output area
    output = gr.JSON(label="Prediction Output")

    # connect button to function
    submit_btn.click(fn=detect_bias, inputs=text_input, outputs=output)

    # disclaimer at the bottom
    gr.Markdown("⚠️ **Disclaimer:** This model is trained on a small, synthetic dataset and may not always be accurate. Results should be interpreted cautiously and reviewed by a human.")

# run the app
demo.launch()