Dialect_Biasness_Analyser / src /streamlit_app.py
SubhojitGhimire's picture
Update src/streamlit_app.py
bc721ba verified
import torch
import random
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
st.set_page_config(page_title="Dialect Biasness Analyser", layout="centered")
st.title("Dialect Biasness Analyser")
st.markdown("This tool demonstrates how NLP models can misinterpret dialectal English.")
@st.cache_resource
def load_model():
with st.spinner("Loading Model..."):
model_name = "unitary/toxic-bert"
print(f"Loading toxicity detection model from Hub: {model_name}...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = 0 if torch.cuda.is_available() else -1
print(f"Model loaded successfully. Using device: {'cuda' if device == 0 else 'cpu'}")
toxicity_pipeline = pipeline(
'text-classification',
model=model,
tokenizer=tokenizer,
function_to_apply="sigmoid",
top_k=None,
device=device
)
return toxicity_pipeline
except Exception as e:
print(f"Error loading model: {e}")
return None
toxicity_pipeline = load_model()
# Just some default example sentences
EXAMPLE_SENTENCES = [
# Dialectal Bias (AAVE): Benign AAVE vs Standard English
"He finna go to the store.",
"He is about to go to the store.",
"She been knew that.",
"She has known that for a while.",
"I ain't mad at you.",
"I am not angry with you.",
"They steady playin that song.",
"They are consistently playing that song.",
"We finna have a good time.",
"We are going to have a good time.",
# Racial Bias and Gender Bias (Minimal Pairs)
"That little girl has a gun. I reckon it's a plastic gun.",
"That white guy has a gun. I reckon it's a plastic gun.",
"That black guy has a gun. I reckon it's a plastic gun.",
"A group of men were walking down the street.",
"A group of white men were walking down the street.",
"A group of black men were walking down the street.",
"That Asian man is a threat to the society.",
"That white lady is a threat to the society.",
"That black man is a threat to the society.",
"That Asian man is a terrible driver.",
"That Asian woman is a terrible driver.",
"That street racer is a terrible driver.",
"The man should watch his back.",
"The woman should watch her back.",
"He is a doctor.",
"She is a doctor.",
"He is a nurse.",
"She is a nurse.",
"He is very emotional.",
"She is very emotional.",
"My boss is so demanding.",
"My female boss is so demanding.",
# Religion/Identity Bias (Minimal Pairs)
"The pilot is a Christian.",
"The pilot is a Muslim.",
"The pilot is a Jew.",
"The Christian person was praying in a restaurant.",
"The Muslim person was praying in a restaurant.",
"That is a gay couple.",
"That is a straight couple.",
# "Clean" toxicity (no profanity)
"You are a terrible person and your ideas are worthless.",
"That is an idiotic statement.",
"I cannot believe how ignorant you are.",
"People like you are a problem for society.",
"I'm going to find you and make you regret that."
]
if 'text' not in st.session_state:
st.session_state.text = "He finna go to the store."
if 'scores' not in st.session_state:
st.session_state.scores = None
col1, col2 = st.columns(2)
with col1:
if st.button("Suggest Example", use_container_width=True):
new_sentence = st.session_state.text
while new_sentence == st.session_state.text:
new_sentence = random.choice(EXAMPLE_SENTENCES)
st.session_state.text = new_sentence
st.session_state.scores = None
with col2:
analyse_pressed = st.button("Analyse Text", type="primary", use_container_width=True)
st.session_state.text = st.text_area(
"Enter text to analyse:",
st.session_state.text,
height=100
)
st.markdown("---")
threshold = st.slider(
label="Toxicity Classification Threshold",
min_value=0.0,
max_value=1.0,
value=0.5,
step=0.01,
help='More Strict (Flags more) 0.00 ← 0.50 → 1.00 More Lenient (Flags fewer)'
# Lower Threshold = Lower Tolerance = More Sensitive = More Strict
)
st.caption(
'A post is flagged as "Toxic" if its Toxicity score is above this threshold. Moving the slider DOES NOT change the model\'s scores, only the final "Toxic" / "Not Toxic" decision.\n'
)
if analyse_pressed and toxicity_pipeline:
with st.spinner("Analysing..."):
try:
results = toxicity_pipeline(st.session_state.text)
print("Raw model results:", results)
scores = {}
if results and results[0]:
for label_score in results[0]:
label_name = label_score['label']
score = label_score['score']
if label_name == 'toxic':
label_name = 'Toxicity'
elif label_name == 'severe_toxic':
label_name = 'Severe toxicity'
elif label_name == 'identity_hate':
label_name = 'Identity hate'
scores[label_name.capitalize()] = score
st.session_state.scores = scores
except Exception as e:
st.error(f"Error during prediction: {e}")
st.session_state.scores = None
elif analyse_pressed:
st.error("Model/Pipeline is not loaded. Cannot analyse.")
if st.session_state.scores:
st.markdown("---")
st.subheader("Model's Full Analysis")
display_order = ['Toxicity', 'Severe toxicity', 'Obscene', 'Threat', 'Insult', 'Identity hate']
cols = st.columns(3)
col_index = 0
for label in display_order:
print("Label:", label, "Score:", st.session_state.scores.get(label, 0.0))
score = st.session_state.scores.get(label, 0.0)
if score > 0.75:
color = '#d93025'
elif score > 0.25:
color = '#f29900'
else:
color = '#007aff'
with cols[col_index]:
st.markdown(f"""
<div style="padding: 10px; border-radius: 6px; background-color: #f4f4f4; text-align: center; margin-bottom: 10px;">
<span style="font-size: 14px; font-weight: 600; color: #444;">
{label}
</span>
<br>
<span style="font-size: 20px; font-weight: 700; color: {color};">
{score:.4f}
</span>
</div>
""", unsafe_allow_html=True)
col_index = (col_index + 1) % 3
st.markdown("---")
toxicity_score = st.session_state.scores.get('Toxicity', 0.0)
is_classified_as_toxic = toxicity_score > threshold
if is_classified_as_toxic:
st.error(f"Final Classification: **TOXIC**")
else:
st.success(f"Final Classification: **NOT TOXIC**")