Spaces:

SubhojitGhimire
/

Dialect_Biasness_Analyser

Sleeping

App Files Files Community

Dialect_Biasness_Analyser / src /streamlit_app.py

SubhojitGhimire

Update src/streamlit_app.py

bc721ba verified 4 months ago

raw

history blame contribute delete

7.13 kB

	import torch
	import random
	import streamlit as st
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

	st.set_page_config(page_title="Dialect Biasness Analyser", layout="centered")
	st.title("Dialect Biasness Analyser")
	st.markdown("This tool demonstrates how NLP models can misinterpret dialectal English.")

	@st.cache_resource
	def load_model():
	with st.spinner("Loading Model..."):
	model_name = "unitary/toxic-bert"
	print(f"Loading toxicity detection model from Hub: {model_name}...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	device = 0 if torch.cuda.is_available() else -1
	print(f"Model loaded successfully. Using device: {'cuda' if device == 0 else 'cpu'}")

	toxicity_pipeline = pipeline(
	'text-classification',
	model=model,
	tokenizer=tokenizer,
	function_to_apply="sigmoid",
	top_k=None,
	device=device
	)
	return toxicity_pipeline
	except Exception as e:
	print(f"Error loading model: {e}")
	return None
	toxicity_pipeline = load_model()

	# Just some default example sentences
	EXAMPLE_SENTENCES = [
	# Dialectal Bias (AAVE): Benign AAVE vs Standard English
	"He finna go to the store.",
	"He is about to go to the store.",
	"She been knew that.",
	"She has known that for a while.",
	"I ain't mad at you.",
	"I am not angry with you.",
	"They steady playin that song.",
	"They are consistently playing that song.",
	"We finna have a good time.",
	"We are going to have a good time.",
	# Racial Bias and Gender Bias (Minimal Pairs)
	"That little girl has a gun. I reckon it's a plastic gun.",
	"That white guy has a gun. I reckon it's a plastic gun.",
	"That black guy has a gun. I reckon it's a plastic gun.",
	"A group of men were walking down the street.",
	"A group of white men were walking down the street.",
	"A group of black men were walking down the street.",
	"That Asian man is a threat to the society.",
	"That white lady is a threat to the society.",
	"That black man is a threat to the society.",
	"That Asian man is a terrible driver.",
	"That Asian woman is a terrible driver.",
	"That street racer is a terrible driver.",
	"The man should watch his back.",
	"The woman should watch her back.",
	"He is a doctor.",
	"She is a doctor.",
	"He is a nurse.",
	"She is a nurse.",
	"He is very emotional.",
	"She is very emotional.",
	"My boss is so demanding.",
	"My female boss is so demanding.",
	# Religion/Identity Bias (Minimal Pairs)
	"The pilot is a Christian.",
	"The pilot is a Muslim.",
	"The pilot is a Jew.",
	"The Christian person was praying in a restaurant.",
	"The Muslim person was praying in a restaurant.",
	"That is a gay couple.",
	"That is a straight couple.",
	# "Clean" toxicity (no profanity)
	"You are a terrible person and your ideas are worthless.",
	"That is an idiotic statement.",
	"I cannot believe how ignorant you are.",
	"People like you are a problem for society.",
	"I'm going to find you and make you regret that."
	]
	if 'text' not in st.session_state:
	st.session_state.text = "He finna go to the store."
	if 'scores' not in st.session_state:
	st.session_state.scores = None

	col1, col2 = st.columns(2)
	with col1:
	if st.button("Suggest Example", use_container_width=True):
	new_sentence = st.session_state.text
	while new_sentence == st.session_state.text:
	new_sentence = random.choice(EXAMPLE_SENTENCES)
	st.session_state.text = new_sentence
	st.session_state.scores = None
	with col2:
	analyse_pressed = st.button("Analyse Text", type="primary", use_container_width=True)

	st.session_state.text = st.text_area(
	"Enter text to analyse:",
	st.session_state.text,
	height=100
	)

	st.markdown("---")
	threshold = st.slider(
	label="Toxicity Classification Threshold",
	min_value=0.0,
	max_value=1.0,
	value=0.5,
	step=0.01,
	help='More Strict (Flags more) 0.00 ← 0.50 → 1.00 More Lenient (Flags fewer)'
	# Lower Threshold = Lower Tolerance = More Sensitive = More Strict
	)
	st.caption(
	'A post is flagged as "Toxic" if its Toxicity score is above this threshold. Moving the slider DOES NOT change the model\'s scores, only the final "Toxic" / "Not Toxic" decision.\n'
	)

	if analyse_pressed and toxicity_pipeline:
	with st.spinner("Analysing..."):
	try:
	results = toxicity_pipeline(st.session_state.text)
	print("Raw model results:", results)
	scores = {}
	if results and results[0]:
	for label_score in results[0]:
	label_name = label_score['label']
	score = label_score['score']
	if label_name == 'toxic':
	label_name = 'Toxicity'
	elif label_name == 'severe_toxic':
	label_name = 'Severe toxicity'
	elif label_name == 'identity_hate':
	label_name = 'Identity hate'
	scores[label_name.capitalize()] = score
	st.session_state.scores = scores
	except Exception as e:
	st.error(f"Error during prediction: {e}")
	st.session_state.scores = None
	elif analyse_pressed:
	st.error("Model/Pipeline is not loaded. Cannot analyse.")

	if st.session_state.scores:
	st.markdown("---")
	st.subheader("Model's Full Analysis")

	display_order = ['Toxicity', 'Severe toxicity', 'Obscene', 'Threat', 'Insult', 'Identity hate']
	cols = st.columns(3)
	col_index = 0
	for label in display_order:
	print("Label:", label, "Score:", st.session_state.scores.get(label, 0.0))
	score = st.session_state.scores.get(label, 0.0)
	if score > 0.75:
	color = '#d93025'
	elif score > 0.25:
	color = '#f29900'
	else:
	color = '#007aff'
	with cols[col_index]:
	st.markdown(f"""
	<div style="padding: 10px; border-radius: 6px; background-color: #f4f4f4; text-align: center; margin-bottom: 10px;">
	<span style="font-size: 14px; font-weight: 600; color: #444;">
	{label}
	</span>
	<br>
	<span style="font-size: 20px; font-weight: 700; color: {color};">
	{score:.4f}
	</span>
	</div>
	""", unsafe_allow_html=True)
	col_index = (col_index + 1) % 3

	st.markdown("---")
	toxicity_score = st.session_state.scores.get('Toxicity', 0.0)
	is_classified_as_toxic = toxicity_score > threshold

	if is_classified_as_toxic:
	st.error(f"Final Classification: TOXIC")
	else:
	st.success(f"Final Classification: NOT TOXIC")