Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| # Load the model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("BenjaminOcampo/peace_hatebert") | |
| model = AutoModelForSequenceClassification.from_pretrained("BenjaminOcampo/peace_hatebert") | |
| # Define more nuanced labels for the model output | |
| nuanced_labels = { | |
| 0: "Non-Hate Speech", | |
| 1: "Explicit Hate", | |
| 2: "Implicit Hate", | |
| 3: "White Grievance" | |
| } | |
| # Microaggressions detection rules | |
| microaggressions = { | |
| "You're so articulate": "This phrase can imply surprise that the individual can speak well, often used in a way that suggests it is unexpected for someone of their background.", | |
| "Where are you really from": "This question implies that the individual does not belong or is not truly part of the community.", | |
| "I don't see color": "This statement can negate the experiences and identities of people of different races.", | |
| "You're a credit to your race": "This phrase implies that most people of the individual’s race are not successful or commendable." | |
| } | |
| # A sample set of explanations and suggestions | |
| bias_suggestions = { | |
| "Explicit Hate": { | |
| "suggestion": "Consider using language that promotes inclusivity and respect.", | |
| "explanation": "The text contains explicit hate speech, which is overtly harmful and discriminatory. It is important to foster communication that is inclusive and respectful of all individuals." | |
| }, | |
| "Implicit Hate": { | |
| "suggestion": "Try rephrasing to avoid subtle bias and ensure clarity.", | |
| "explanation": "The text contains implicit hate speech, which can perpetuate stereotypes and bias in a less overt manner. Aim for language that is clear and free from insinuations." | |
| }, | |
| "White Grievance": { | |
| "suggestion": "Reconsider any generalized claims about racial groups.", | |
| "explanation": "The text appears to express grievances linked to racial identity, which can contribute to divisive narratives. Strive for dialogue that acknowledges diversity and avoids stereotyping." | |
| }, | |
| "Non-Hate Speech": { | |
| "suggestion": "No problematic content detected.", | |
| "explanation": "The text does not appear to contain hate speech or bias. It seems respectful and neutral." | |
| }, | |
| "Microaggression": { | |
| "suggestion": "Be mindful of how certain phrases can be interpreted by others.", | |
| "explanation": "The text includes phrases that may be considered microaggressions, which can subtly perpetuate stereotypes or biases." | |
| } | |
| } | |
| def analyze_text(text): | |
| # Tokenize input text | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| # Get model predictions | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Get prediction probabilities | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| predicted_class = torch.argmax(probs, dim=-1).item() | |
| # Map the predicted class to the nuanced label | |
| label = nuanced_labels.get(predicted_class, "Unknown") | |
| # Check for microaggressions using the predefined rules | |
| for phrase, explanation in microaggressions.items(): | |
| if phrase.lower() in text.lower(): | |
| label = "Microaggression" | |
| suggestion = bias_suggestions[label]["suggestion"] | |
| explanation = bias_suggestions[label]["explanation"] | |
| return label, suggestion, explanation | |
| # Fetch suggestion and explanation based on label | |
| suggestion = bias_suggestions[label]["suggestion"] | |
| explanation = bias_suggestions[label]["explanation"] | |
| return label, suggestion, explanation | |
| # Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=analyze_text, | |
| inputs=gr.Textbox(lines=5, placeholder="Enter text to analyze..."), | |
| outputs=[ | |
| gr.Textbox(label="Classification"), | |
| gr.Textbox(label="Suggestion"), | |
| gr.Textbox(label="Explanation") | |
| ], | |
| title="Proofreading for Implicit Bias, Microagressions - inital model test", | |
| description="Analyze text for nuanced bias categories such as implicit hate, explicit hate, or white grievance, and detect microaggressions to provide suggestions for improvement - step 1: prompt testing. Credit to https://huggingface.co/BenjaminOcampo" | |
| ) | |
| # Launch the interface | |
| interface.launch() | |