Spaces:

lotachi
/

hate-speech-detector

Sleeping

App Files Files Community

lotachi commited on Mar 3

Commit

7d13847

verified ·

1 Parent(s): 571f01e

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +108 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import gradio as gr
+import torch
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+import shap
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load fine-tuned model from Hub
+MODEL_ID  = "lotachi/hatebert-toxic-classifier"  # we push this in Cell 10
+FALLBACK  = "GroNLP/hateBERT"
+device = torch.device("cpu")  # HF Spaces free tier is CPU
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    model     = AutoModelForSequenceClassification.from_pretrained(MODEL_ID).to(device)
+    print(f"Loaded fine-tuned model: {MODEL_ID}")
+except:
+    print("Fine-tuned model not found, loading base HateBERT")
+    tokenizer = AutoTokenizer.from_pretrained(FALLBACK)
+    model     = AutoModelForSequenceClassification.from_pretrained(FALLBACK, num_labels=2).to(device)
+model.eval()
+CLASS_NAMES = ["Non-Toxic", "Toxic"]
+def predict_single(text):
+    enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=128, padding=True)
+    with torch.no_grad():
+        logits = model(**enc).logits
+    probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
+    pred  = probs.argmax()
+    return CLASS_NAMES[pred], {CLASS_NAMES[i]: float(probs[i]) for i in range(2)}, float(probs[1])
+def predict_batch(texts):
+    all_probs = []
+    for i in range(0, len(texts), 8):
+        batch = list(texts[i:i+8])
+        enc   = tokenizer(batch, padding=True, truncation=True, max_length=128, return_tensors="pt")
+        with torch.no_grad():
+            logits = model(**enc).logits
+        all_probs.append(torch.softmax(logits, dim=1).cpu().numpy())
+    return np.vstack(all_probs)
+masker    = shap.maskers.Text(tokenizer)
+explainer = shap.Explainer(predict_batch, masker, output_names=CLASS_NAMES)
+def classify_text(text):
+    if not text or not text.strip():
+        return "Please enter some text.", {}, None
+    text = text.strip()[:800]
+    label, prob_dict, toxic_prob = predict_single(text)
+    if toxic_prob >= 0.8:
+        display = f"🚨 {label}  ({toxic_prob:.0%} confidence)"
+    elif toxic_prob >= 0.5:
+        display = f"⚠️ {label}  ({toxic_prob:.0%} confidence)"
+    else:
+        display = f"✅ {label}  ({1-toxic_prob:.0%} confidence)"
+    try:
+        sv     = explainer([text])
+        tokens = tokenizer.tokenize(text)[:25]
+        vals   = sv[0].values[:len(tokens), 1]
+        vmax   = max(abs(vals).max(), 0.01)
+        norm   = mcolors.TwoSlopeNorm(vmin=-vmax, vcenter=0, vmax=vmax)
+        cmap   = plt.cm.RdYlGn_r
+        fig, ax = plt.subplots(figsize=(10, max(3, len(tokens)*0.35)))
+        ax.barh(range(len(tokens)), vals, color=cmap(norm(vals)), edgecolor="white", height=0.7)
+        ax.set_yticks(range(len(tokens)))
+        ax.set_yticklabels(tokens, fontsize=10)
+        ax.axvline(0, color="black", linewidth=0.8)
+        ax.set_xlabel("SHAP Value", fontsize=10)
+        ax.set_title("Token SHAP Importance (Toxic class)", fontsize=12, fontweight="bold")
+        ax.invert_yaxis()
+        ax.spines[["top","right"]].set_visible(False)
+        plt.tight_layout()
+    except:
+        fig = None
+    return display, prob_dict, fig
+EXAMPLES = [
+    ["I really enjoyed the community event today!"],
+    ["Thanks for your help, it made a big difference."],
+    ["The policy has been criticised by many stakeholders."],
+]
+with gr.Blocks(title="Hate Speech Detector", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""# 🛡️ Hate Speech & Toxic Comment Detector
+**MSc Data Science | CMP-L016 Deep Learning Applications**
+Classifies text using fine-tuned **HateBERT** with **SHAP** word-level explanations.
+""")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(lines=5, placeholder="Enter text...", label="Input Text")
+            submit_btn = gr.Button("🔍 Classify", variant="primary")
+            gr.Examples(examples=EXAMPLES, inputs=text_input)
+        with gr.Column():
+            label_out = gr.Textbox(label="Result", interactive=False, lines=2)
+            prob_out  = gr.Label(num_top_classes=2, label="Confidence")
+    shap_out = gr.Plot(label="SHAP Explanation")
+    gr.Markdown("> ⚠️ Research tool only. Not for production moderation decisions.")
+    submit_btn.click(classify_text, inputs=text_input, outputs=[label_out, prob_out, shap_out])
+    text_input.submit(classify_text, inputs=text_input, outputs=[label_out, prob_out, shap_out])
+demo.launch()