|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import plotly.graph_objects as go |
|
|
import numpy as np |
|
|
import os |
|
|
|
|
|
class HateSpeechDetector: |
|
|
def __init__(self, model_path: str = "sadjava/multilingual-hate-speech-xlm-roberta"): |
|
|
"""Initialize the hate speech detector with a trained model.""" |
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
print(f"🔧 Using device: {self.device}") |
|
|
|
|
|
|
|
|
try: |
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
|
self.model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
self.model.to(self.device) |
|
|
self.model.eval() |
|
|
print(f"✅ Model loaded successfully from {model_path}") |
|
|
except Exception as e: |
|
|
print(f"❌ Error loading model: {e}") |
|
|
|
|
|
print("🔄 Falling back to default multilingual model...") |
|
|
self.tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") |
|
|
self.model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert") |
|
|
self.model.to(self.device) |
|
|
self.model.eval() |
|
|
|
|
|
|
|
|
self.categories = [ |
|
|
"Race", "Sexual Orientation", "Gender", "Physical Appearance", |
|
|
"Religion", "Class", "Disability", "Appropriate" |
|
|
] |
|
|
|
|
|
def predict_with_context(self, text: str) -> tuple: |
|
|
"""Predict hate speech category with contextual analysis.""" |
|
|
if not text.strip(): |
|
|
return "Please enter some text", 0.0, {}, "" |
|
|
|
|
|
try: |
|
|
|
|
|
inputs = self.tokenizer( |
|
|
text, |
|
|
return_tensors="pt", |
|
|
truncation=True, |
|
|
padding=True, |
|
|
max_length=512, |
|
|
return_attention_mask=True |
|
|
) |
|
|
|
|
|
|
|
|
inputs = {k: v.to(self.device) for k, v in inputs.items()} |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model(**inputs, output_attentions=True) |
|
|
logits = outputs.logits |
|
|
attentions = outputs.attentions |
|
|
|
|
|
|
|
|
probabilities = F.softmax(logits, dim=-1) |
|
|
|
|
|
|
|
|
if probabilities.shape[-1] == len(self.categories): |
|
|
predicted_class = torch.argmax(probabilities, dim=-1).item() |
|
|
predicted_category = self.categories[predicted_class] |
|
|
else: |
|
|
|
|
|
predicted_class = torch.argmax(probabilities, dim=-1).item() |
|
|
predicted_category = "Inappropriate" if predicted_class == 1 else "Appropriate" |
|
|
|
|
|
prob_inappropriate = float(probabilities[0][1]) if probabilities.shape[-1] > 1 else 0.5 |
|
|
fake_probs = torch.zeros(len(self.categories)) |
|
|
fake_probs[-1] = 1 - prob_inappropriate |
|
|
fake_probs[0] = prob_inappropriate / 7 |
|
|
for i in range(1, 7): |
|
|
fake_probs[i] = prob_inappropriate / 7 |
|
|
probabilities = fake_probs.unsqueeze(0) |
|
|
|
|
|
confidence = float(torch.max(probabilities[0])) |
|
|
|
|
|
|
|
|
confidence_chart = self.create_confidence_chart(probabilities[0]) |
|
|
|
|
|
|
|
|
highlighted_html = self.create_word_highlighting(text, inputs, attentions) |
|
|
|
|
|
return predicted_category, confidence, confidence_chart, highlighted_html |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in prediction: {e}") |
|
|
return f"Error: {str(e)}", 0.0, {}, "" |
|
|
|
|
|
def create_confidence_chart(self, probabilities): |
|
|
"""Create confidence visualization.""" |
|
|
scores = [float(prob) for prob in probabilities] |
|
|
colors = ['#ff6b6b' if cat != 'Appropriate' else '#51cf66' for cat in self.categories] |
|
|
|
|
|
fig = go.Figure(data=[ |
|
|
go.Bar( |
|
|
x=self.categories, |
|
|
y=scores, |
|
|
marker_color=colors, |
|
|
text=[f'{score:.1%}' for score in scores], |
|
|
textposition='auto', |
|
|
) |
|
|
]) |
|
|
|
|
|
fig.update_layout( |
|
|
title="Confidence Scores by Category", |
|
|
xaxis_title="Categories", |
|
|
yaxis_title="Confidence", |
|
|
yaxis_range=[0, 1], |
|
|
height=400, |
|
|
xaxis_tickangle=-45 |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
def create_word_highlighting(self, text, inputs, attentions): |
|
|
"""Create word-level importance highlighting.""" |
|
|
try: |
|
|
|
|
|
last_layer_attention = attentions[-1][0] |
|
|
avg_attention = torch.mean(last_layer_attention, dim=0) |
|
|
|
|
|
|
|
|
token_importance = torch.sum(avg_attention, dim=0).cpu().numpy() |
|
|
tokens = self.tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) |
|
|
|
|
|
|
|
|
content_tokens = tokens[1:-1] if len(tokens) > 2 else tokens |
|
|
content_importance = token_importance[1:-1] if len(token_importance) > 2 else token_importance |
|
|
|
|
|
|
|
|
if len(content_importance) > 1: |
|
|
importance_norm = (content_importance - content_importance.min()) / (content_importance.max() - content_importance.min() + 1e-8) |
|
|
importance_norm = np.power(importance_norm, 0.5) |
|
|
else: |
|
|
importance_norm = np.array([0.5]) |
|
|
|
|
|
|
|
|
words = text.split() |
|
|
word_scores = [] |
|
|
|
|
|
|
|
|
token_idx = 0 |
|
|
for word in words: |
|
|
word_importance_scores = [] |
|
|
word_tokens = self.tokenizer.tokenize(word) |
|
|
|
|
|
for _ in word_tokens: |
|
|
if token_idx < len(importance_norm): |
|
|
word_importance_scores.append(importance_norm[token_idx]) |
|
|
token_idx += 1 |
|
|
|
|
|
if word_importance_scores: |
|
|
word_score = np.mean(word_importance_scores) |
|
|
else: |
|
|
word_score = 0.2 |
|
|
|
|
|
word_scores.append(word_score) |
|
|
|
|
|
|
|
|
html_parts = [] |
|
|
for word, score in zip(words, word_scores): |
|
|
if score > 0.7: |
|
|
color = "rgba(220, 53, 69, 0.8)" |
|
|
elif score > 0.5: |
|
|
color = "rgba(255, 193, 7, 0.8)" |
|
|
elif score > 0.3: |
|
|
color = "rgba(255, 235, 59, 0.6)" |
|
|
else: |
|
|
color = "rgba(248, 249, 250, 0.3)" |
|
|
|
|
|
html_parts.append( |
|
|
f'<span style="background-color: {color}; padding: 3px 6px; margin: 2px; ' |
|
|
f'border-radius: 4px; font-weight: 500; border: 1px solid rgba(0,0,0,0.1);" ' |
|
|
f'title="Importance: {score:.3f}">{word}</span>' |
|
|
) |
|
|
|
|
|
return '<div style="line-height: 2.5; font-size: 16px; padding: 10px;">' + ' '.join(html_parts) + '</div>' |
|
|
|
|
|
except Exception as e: |
|
|
return f'<div>Error in highlighting: {str(e)}</div>' |
|
|
|
|
|
|
|
|
detector = HateSpeechDetector() |
|
|
|
|
|
def analyze_text(text: str): |
|
|
"""Main analysis function with innovations.""" |
|
|
try: |
|
|
category, confidence, chart, highlighted = detector.predict_with_context(text) |
|
|
|
|
|
if category == "Appropriate": |
|
|
result = f"✅ **No hate speech detected**\n\nCategory: {category}\nConfidence: {confidence:.1%}" |
|
|
else: |
|
|
result = f"⚠️ **Hate speech detected**\n\nCategory: {category}\nConfidence: {confidence:.1%}" |
|
|
|
|
|
return result, chart, highlighted |
|
|
|
|
|
except Exception as e: |
|
|
return f"❌ Error: {str(e)}", {}, "" |
|
|
|
|
|
def provide_feedback(text: str, rating: int): |
|
|
"""Simple feedback collection.""" |
|
|
if not text.strip(): |
|
|
return "Please analyze some text first!" |
|
|
return f"✅ Thanks for rating {rating}/5 stars! Feedback helps improve the model." |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Multilingual Hate Speech Detector", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown(""" |
|
|
# 🛡️ Multilingual Hate Speech Detector |
|
|
|
|
|
**Advanced AI system for detecting hate speech in English and Serbian text** |
|
|
|
|
|
🔬 **Key Innovations:** |
|
|
- **Contextual Analysis**: See which words influenced the AI's decision |
|
|
- **Confidence Visualization**: Interactive charts showing prediction confidence across all categories |
|
|
- **Word-Level Highlighting**: Visual explanation of model attention and focus |
|
|
- **Multilingual Support**: Trained on English and Serbian hate speech datasets |
|
|
- **Real-time Processing**: Instant classification with detailed explanations |
|
|
|
|
|
📋 **Categories detected:** Race, Sexual Orientation, Gender, Physical Appearance, Religion, Class, Disability, or Appropriate (no hate speech) |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox( |
|
|
label="🔍 Enter text to analyze (English/Serbian)", |
|
|
placeholder="Type or paste text here for hate speech analysis...", |
|
|
lines=4, |
|
|
max_lines=10 |
|
|
) |
|
|
|
|
|
analyze_btn = gr.Button("🚀 Analyze Text", variant="primary", size="lg") |
|
|
|
|
|
gr.Markdown("### 📝 Example Texts") |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["I really enjoyed that movie last night! Great acting and storyline."], |
|
|
["You people are all the same, always causing problems everywhere you go."], |
|
|
["Women just can't drive as well as men, it's basic biology."], |
|
|
["That's so gay, this is stupid and makes no sense at all."], |
|
|
["Ovaj film je bio odličan, preporučujem svima da ga pogledaju!"], |
|
|
["Ti ljudi ne zaslužuju da žive ovde u našoj zemlji."], |
|
|
["Hello world! This is a test message for the AI system."], |
|
|
["People with disabilities contribute so much to our society."] |
|
|
], |
|
|
inputs=text_input, |
|
|
label="Click any example to test the system" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
result_output = gr.Markdown(label="🎯 Classification Result") |
|
|
|
|
|
gr.Markdown("### ℹ️ How it works") |
|
|
gr.Markdown(""" |
|
|
1. **Input Processing**: Text is tokenized and processed by XLM-RoBERTa |
|
|
2. **Classification**: AI predicts hate speech category with confidence scores |
|
|
3. **Attention Analysis**: Model attention weights show word importance |
|
|
4. **Visual Explanation**: Color highlighting reveals decision factors |
|
|
""") |
|
|
|
|
|
|
|
|
gr.Markdown("### 📊 **Innovation 1**: Confidence Visualization") |
|
|
gr.Markdown("*Interactive chart showing model confidence across all hate speech categories*") |
|
|
confidence_plot = gr.Plot(label="Confidence Distribution") |
|
|
|
|
|
|
|
|
gr.Markdown("### 🌈 **Innovation 2**: Contextual Word Analysis") |
|
|
gr.Markdown("*Words are highlighted based on their influence on the classification decision*") |
|
|
gr.Markdown("🔴 **Red**: High influence | 🟠 **Orange**: Medium influence | 🟡 **Yellow**: Low influence | ⚪ **Gray**: Minimal influence") |
|
|
highlighted_text = gr.HTML(label="Word Importance Analysis") |
|
|
|
|
|
|
|
|
with gr.Accordion("💬 **Innovation 3**: Interactive Feedback System", open=False): |
|
|
gr.Markdown("**Help improve the AI model by providing your feedback!**") |
|
|
with gr.Row(): |
|
|
feedback_rating = gr.Slider(1, 5, step=1, value=3, label="Rate analysis quality (1-5 stars)") |
|
|
feedback_btn = gr.Button("📝 Submit Feedback") |
|
|
feedback_output = gr.Textbox(label="Feedback Status", interactive=False) |
|
|
|
|
|
|
|
|
with gr.Accordion("🔧 Technical Details", open=False): |
|
|
gr.Markdown(""" |
|
|
**Model Architecture**: XLM-RoBERTa (Cross-lingual Language Model) |
|
|
**Training Data**: Multilingual hate speech datasets (English + Serbian) |
|
|
**Categories**: 8 classes including 7 hate speech types + appropriate content |
|
|
**Attention Mechanism**: Transformer attention weights for explainability |
|
|
**Deployment**: Hugging Face Spaces with GPU acceleration |
|
|
""") |
|
|
|
|
|
|
|
|
analyze_btn.click( |
|
|
fn=analyze_text, |
|
|
inputs=[text_input], |
|
|
outputs=[result_output, confidence_plot, highlighted_text] |
|
|
) |
|
|
|
|
|
feedback_btn.click( |
|
|
fn=provide_feedback, |
|
|
inputs=[text_input, feedback_rating], |
|
|
outputs=[feedback_output] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
**⚡ Powered by**: Transformer Neural Networks | **🌍 Languages**: English, Serbian | **🎯 Accuracy**: High-confidence predictions |
|
|
|
|
|
*This AI system is designed for research and educational purposes. Results should be interpreted carefully and human judgment should always be applied for critical decisions.* |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |