Spaces:

seffyehl
/

hatespeech-detection

Running

jl commited on Dec 14, 2025

Commit

34b86dc

0 Parent(s):

prototype initial

Files changed (7) hide show

.gitignore ADDED Viewed

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.14

README.md ADDED Viewed

File without changes

app.py ADDED Viewed

+import streamlit as st
+from hatespeech_model import predict_hatespeech
+import random
+st.set_page_config(page_title="Hatespeech Classifier", layout="centered")
+st.title("Hatespeech Text Classifier")
+st.write("Enter text below to classify if it is hatespeech or not.")
+user_input = st.text_area("Text to classify", "")
+input_split = user_input.split(" ")
+word_probabilities = {word: round(random.uniform(0, 1), 2) for word in input_split if word}
+if st.button("Classify"):
+    if user_input.strip():
+        result = predict_hatespeech(user_input)
+        st.markdown(f"**Result:** {result}")
+        col1, col2 = st.columns(2)
+        col1.title("Shield Model Results")
+        col2.title("Interpretable Shield Model Results")
+        col1.write(f"**Result:** {result} ")
+        col1.write(f"**Probability:** {random.uniform(0, 1)} ")
+        col2.write(f"**Result:** {result}")
+        col2.write(f"**Probability:** {random.uniform(0, 1)} ")
+        col2.table({"Feature": input_split, "Importance": word_probabilities.values()})
+    else:
+        st.warning("Please enter some text to classify.")

hatespeech_model.py ADDED Viewed

+import re
+import nltk
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+import pandas as pd
+import os
+# Download NLTK data if not already present
+nltk.download('stopwords', quiet=True)
+from nltk.corpus import stopwords
+# Example training data (for demonstration)
+data = {
+    'text': [
+        'I hate you',
+        'You are so stupid',
+        'Have a nice day',
+        'I love this',
+        'You are an idiot',
+        'What a wonderful world',
+        'You are disgusting',
+        'Such a pleasant surprise',
+        'I despise your actions',
+        'You are amazing',
+    ],
+    'label': [1, 1, 0, 0, 1, 0, 1, 0, 1, 0]  # 1 = hatespeech, 0 = not
+}
+df = pd.DataFrame(data)
+# Preprocessing function
+def preprocess(text):
+    text = text.lower()
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    tokens = text.split()
+    tokens = [t for t in tokens if t not in stopwords.words('english')]
+    return ' '.join(tokens)
+df['text_clean'] = df['text'].apply(preprocess)
+# Vectorizer and model
+vectorizer = CountVectorizer()
+X = vectorizer.fit_transform(df['text_clean'])
+y = df['label']
+model = MultinomialNB()
+model.fit(X, y)
+def predict_hatespeech(text):
+    text_clean = preprocess(text)
+    X_test = vectorizer.transform([text_clean])
+    pred = model.predict(X_test)[0]
+    return 'Hatespeech' if pred == 1 else 'Not Hatespeech'

pyproject.toml ADDED Viewed

+[project]
+name = "proto"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.14"
+dependencies = [
+    "nltk>=3.9.2",
+    "pandas>=2.3.3",
+    "scikit-learn>=1.8.0",
+    "streamlit>=1.52.1",
+]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff