jl commited on
Commit
34b86dc
·
0 Parent(s):

prototype initial

Browse files
Files changed (7) hide show
  1. .gitignore +10 -0
  2. .python-version +1 -0
  3. README.md +0 -0
  4. app.py +27 -0
  5. hatespeech_model.py +52 -0
  6. pyproject.toml +12 -0
  7. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.14
README.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from hatespeech_model import predict_hatespeech
3
+ import random
4
+
5
+ st.set_page_config(page_title="Hatespeech Classifier", layout="centered")
6
+ st.title("Hatespeech Text Classifier")
7
+ st.write("Enter text below to classify if it is hatespeech or not.")
8
+
9
+ user_input = st.text_area("Text to classify", "")
10
+ input_split = user_input.split(" ")
11
+
12
+ word_probabilities = {word: round(random.uniform(0, 1), 2) for word in input_split if word}
13
+
14
+ if st.button("Classify"):
15
+ if user_input.strip():
16
+ result = predict_hatespeech(user_input)
17
+ st.markdown(f"**Result:** {result}")
18
+ col1, col2 = st.columns(2)
19
+ col1.title("Shield Model Results")
20
+ col2.title("Interpretable Shield Model Results")
21
+ col1.write(f"**Result:** {result} ")
22
+ col1.write(f"**Probability:** {random.uniform(0, 1)} ")
23
+ col2.write(f"**Result:** {result}")
24
+ col2.write(f"**Probability:** {random.uniform(0, 1)} ")
25
+ col2.table({"Feature": input_split, "Importance": word_probabilities.values()})
26
+ else:
27
+ st.warning("Please enter some text to classify.")
hatespeech_model.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ from sklearn.feature_extraction.text import CountVectorizer
4
+ from sklearn.naive_bayes import MultinomialNB
5
+ import pandas as pd
6
+ import os
7
+
8
+ # Download NLTK data if not already present
9
+ nltk.download('stopwords', quiet=True)
10
+ from nltk.corpus import stopwords
11
+
12
+ # Example training data (for demonstration)
13
+ data = {
14
+ 'text': [
15
+ 'I hate you',
16
+ 'You are so stupid',
17
+ 'Have a nice day',
18
+ 'I love this',
19
+ 'You are an idiot',
20
+ 'What a wonderful world',
21
+ 'You are disgusting',
22
+ 'Such a pleasant surprise',
23
+ 'I despise your actions',
24
+ 'You are amazing',
25
+ ],
26
+ 'label': [1, 1, 0, 0, 1, 0, 1, 0, 1, 0] # 1 = hatespeech, 0 = not
27
+ }
28
+ df = pd.DataFrame(data)
29
+
30
+ # Preprocessing function
31
+ def preprocess(text):
32
+ text = text.lower()
33
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
34
+ tokens = text.split()
35
+ tokens = [t for t in tokens if t not in stopwords.words('english')]
36
+ return ' '.join(tokens)
37
+
38
+ df['text_clean'] = df['text'].apply(preprocess)
39
+
40
+ # Vectorizer and model
41
+ vectorizer = CountVectorizer()
42
+ X = vectorizer.fit_transform(df['text_clean'])
43
+ y = df['label']
44
+
45
+ model = MultinomialNB()
46
+ model.fit(X, y)
47
+
48
+ def predict_hatespeech(text):
49
+ text_clean = preprocess(text)
50
+ X_test = vectorizer.transform([text_clean])
51
+ pred = model.predict(X_test)[0]
52
+ return 'Hatespeech' if pred == 1 else 'Not Hatespeech'
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "proto"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ dependencies = [
8
+ "nltk>=3.9.2",
9
+ "pandas>=2.3.3",
10
+ "scikit-learn>=1.8.0",
11
+ "streamlit>=1.52.1",
12
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff