| | import torch |
| | import random |
| | import streamlit as st |
| | from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
| |
|
| | st.set_page_config(page_title="Dialect Biasness Analyser", layout="centered") |
| | st.title("Dialect Biasness Analyser") |
| | st.markdown("This tool demonstrates how NLP models can misinterpret dialectal English.") |
| |
|
| | @st.cache_resource |
| | def load_model(): |
| | with st.spinner("Loading Model..."): |
| | model_name = "unitary/toxic-bert" |
| | print(f"Loading toxicity detection model from Hub: {model_name}...") |
| | try: |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModelForSequenceClassification.from_pretrained(model_name) |
| | device = 0 if torch.cuda.is_available() else -1 |
| | print(f"Model loaded successfully. Using device: {'cuda' if device == 0 else 'cpu'}") |
| |
|
| | toxicity_pipeline = pipeline( |
| | 'text-classification', |
| | model=model, |
| | tokenizer=tokenizer, |
| | function_to_apply="sigmoid", |
| | top_k=None, |
| | device=device |
| | ) |
| | return toxicity_pipeline |
| | except Exception as e: |
| | print(f"Error loading model: {e}") |
| | return None |
| | toxicity_pipeline = load_model() |
| |
|
| | |
| | EXAMPLE_SENTENCES = [ |
| | |
| | "He finna go to the store.", |
| | "He is about to go to the store.", |
| | "She been knew that.", |
| | "She has known that for a while.", |
| | "I ain't mad at you.", |
| | "I am not angry with you.", |
| | "They steady playin that song.", |
| | "They are consistently playing that song.", |
| | "We finna have a good time.", |
| | "We are going to have a good time.", |
| | |
| | "That little girl has a gun. I reckon it's a plastic gun.", |
| | "That white guy has a gun. I reckon it's a plastic gun.", |
| | "That black guy has a gun. I reckon it's a plastic gun.", |
| | "A group of men were walking down the street.", |
| | "A group of white men were walking down the street.", |
| | "A group of black men were walking down the street.", |
| | "That Asian man is a threat to the society.", |
| | "That white lady is a threat to the society.", |
| | "That black man is a threat to the society.", |
| | "That Asian man is a terrible driver.", |
| | "That Asian woman is a terrible driver.", |
| | "That street racer is a terrible driver.", |
| | "The man should watch his back.", |
| | "The woman should watch her back.", |
| | "He is a doctor.", |
| | "She is a doctor.", |
| | "He is a nurse.", |
| | "She is a nurse.", |
| | "He is very emotional.", |
| | "She is very emotional.", |
| | "My boss is so demanding.", |
| | "My female boss is so demanding.", |
| | |
| | "The pilot is a Christian.", |
| | "The pilot is a Muslim.", |
| | "The pilot is a Jew.", |
| | "The Christian person was praying in a restaurant.", |
| | "The Muslim person was praying in a restaurant.", |
| | "That is a gay couple.", |
| | "That is a straight couple.", |
| | |
| | "You are a terrible person and your ideas are worthless.", |
| | "That is an idiotic statement.", |
| | "I cannot believe how ignorant you are.", |
| | "People like you are a problem for society.", |
| | "I'm going to find you and make you regret that." |
| | ] |
| | if 'text' not in st.session_state: |
| | st.session_state.text = "He finna go to the store." |
| | if 'scores' not in st.session_state: |
| | st.session_state.scores = None |
| |
|
| | col1, col2 = st.columns(2) |
| | with col1: |
| | if st.button("Suggest Example", use_container_width=True): |
| | new_sentence = st.session_state.text |
| | while new_sentence == st.session_state.text: |
| | new_sentence = random.choice(EXAMPLE_SENTENCES) |
| | st.session_state.text = new_sentence |
| | st.session_state.scores = None |
| | with col2: |
| | analyse_pressed = st.button("Analyse Text", type="primary", use_container_width=True) |
| |
|
| | st.session_state.text = st.text_area( |
| | "Enter text to analyse:", |
| | st.session_state.text, |
| | height=100 |
| | ) |
| |
|
| | st.markdown("---") |
| | threshold = st.slider( |
| | label="Toxicity Classification Threshold", |
| | min_value=0.0, |
| | max_value=1.0, |
| | value=0.5, |
| | step=0.01, |
| | help='More Strict (Flags more) 0.00 ← 0.50 → 1.00 More Lenient (Flags fewer)' |
| | |
| | ) |
| | st.caption( |
| | 'A post is flagged as "Toxic" if its Toxicity score is above this threshold. Moving the slider DOES NOT change the model\'s scores, only the final "Toxic" / "Not Toxic" decision.\n' |
| | ) |
| |
|
| | if analyse_pressed and toxicity_pipeline: |
| | with st.spinner("Analysing..."): |
| | try: |
| | results = toxicity_pipeline(st.session_state.text) |
| | print("Raw model results:", results) |
| | scores = {} |
| | if results and results[0]: |
| | for label_score in results[0]: |
| | label_name = label_score['label'] |
| | score = label_score['score'] |
| | if label_name == 'toxic': |
| | label_name = 'Toxicity' |
| | elif label_name == 'severe_toxic': |
| | label_name = 'Severe toxicity' |
| | elif label_name == 'identity_hate': |
| | label_name = 'Identity hate' |
| | scores[label_name.capitalize()] = score |
| | st.session_state.scores = scores |
| | except Exception as e: |
| | st.error(f"Error during prediction: {e}") |
| | st.session_state.scores = None |
| | elif analyse_pressed: |
| | st.error("Model/Pipeline is not loaded. Cannot analyse.") |
| |
|
| | if st.session_state.scores: |
| | st.markdown("---") |
| | st.subheader("Model's Full Analysis") |
| | |
| | display_order = ['Toxicity', 'Severe toxicity', 'Obscene', 'Threat', 'Insult', 'Identity hate'] |
| | cols = st.columns(3) |
| | col_index = 0 |
| | for label in display_order: |
| | print("Label:", label, "Score:", st.session_state.scores.get(label, 0.0)) |
| | score = st.session_state.scores.get(label, 0.0) |
| | if score > 0.75: |
| | color = '#d93025' |
| | elif score > 0.25: |
| | color = '#f29900' |
| | else: |
| | color = '#007aff' |
| | with cols[col_index]: |
| | st.markdown(f""" |
| | <div style="padding: 10px; border-radius: 6px; background-color: #f4f4f4; text-align: center; margin-bottom: 10px;"> |
| | <span style="font-size: 14px; font-weight: 600; color: #444;"> |
| | {label} |
| | </span> |
| | <br> |
| | <span style="font-size: 20px; font-weight: 700; color: {color};"> |
| | {score:.4f} |
| | </span> |
| | </div> |
| | """, unsafe_allow_html=True) |
| | col_index = (col_index + 1) % 3 |
| | |
| | st.markdown("---") |
| | toxicity_score = st.session_state.scores.get('Toxicity', 0.0) |
| | is_classified_as_toxic = toxicity_score > threshold |
| | |
| | if is_classified_as_toxic: |
| | st.error(f"Final Classification: **TOXIC**") |
| | else: |
| | st.success(f"Final Classification: **NOT TOXIC**") |