Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,81 +1,49 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import joblib
|
| 3 |
import re
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
)
|
| 25 |
-
self.tfidf.idf_ = model_data['tfidf_idf']
|
| 26 |
-
self.classifier = model_data['classifier']
|
| 27 |
-
self.classes = model_data['classes']
|
| 28 |
-
|
| 29 |
-
def preprocess_text(self, text):
|
| 30 |
-
if not isinstance(text, str):
|
| 31 |
-
return ""
|
| 32 |
-
text = text.lower()
|
| 33 |
-
text = re.sub(r'https?://\S+|www\.\S+', '', text)
|
| 34 |
-
text = re.sub(r'[^\w\s!?.,]', ' ', text)
|
| 35 |
-
return ' '.join(text.split())
|
| 36 |
-
|
| 37 |
-
def predict(self, text):
|
| 38 |
-
processed_text = self.preprocess_text(text)
|
| 39 |
-
if not processed_text.strip():
|
| 40 |
-
return "No valid text provided", 0.0, {}
|
| 41 |
-
|
| 42 |
-
features = self.tfidf.transform([processed_text])
|
| 43 |
-
prediction = self.classifier.predict(features)[0]
|
| 44 |
-
probabilities = self.classifier.predict_proba(features)[0]
|
| 45 |
-
|
| 46 |
-
prob_dict = {LABEL_NAMES[i]: float(probabilities[i]) for i in range(len(self.classes))}
|
| 47 |
-
confidence = float(probabilities[prediction])
|
| 48 |
-
label = LABEL_NAMES.get(prediction, f"Class {prediction}")
|
| 49 |
-
|
| 50 |
-
return label, confidence, prob_dict
|
| 51 |
-
|
| 52 |
-
try:
|
| 53 |
-
detector = HateSpeechDetector()
|
| 54 |
-
model_loaded = True
|
| 55 |
-
except:
|
| 56 |
-
detector = None
|
| 57 |
-
model_loaded = False
|
| 58 |
-
|
| 59 |
-
def analyze_text(text):
|
| 60 |
-
if not model_loaded:
|
| 61 |
-
return "Model failed to load."
|
| 62 |
-
if not text or not text.strip():
|
| 63 |
return "Please enter some text."
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
|
|
|
| 68 |
demo = gr.Interface(
|
| 69 |
-
fn=
|
| 70 |
-
inputs=gr.Textbox(
|
| 71 |
-
outputs=gr.
|
| 72 |
-
title="Hate Speech
|
|
|
|
| 73 |
examples=[
|
| 74 |
-
["I
|
| 75 |
-
["
|
| 76 |
-
["
|
| 77 |
]
|
| 78 |
)
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
| 81 |
-
demo.launch(
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import joblib
|
| 3 |
import re
|
| 4 |
+
import string
|
| 5 |
+
|
| 6 |
+
# 1. Load the model and vectorizer
|
| 7 |
+
# Ensure 'hate_speech_model.joblib' is in the same directory
|
| 8 |
+
checkpoint = joblib.load('hate_speech_model.joblib')
|
| 9 |
+
model = checkpoint['model']
|
| 10 |
+
tfidf = checkpoint['tfidf']
|
| 11 |
+
|
| 12 |
+
# 2. Pre-processing function (must match the one used during training)
|
| 13 |
+
def clean_text(text):
|
| 14 |
+
text = str(text).lower()
|
| 15 |
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
|
| 16 |
+
text = re.sub(r'\@\w+|\#','', text)
|
| 17 |
+
text = text.translate(str.maketrans('', '', string.punctuation))
|
| 18 |
+
text = ' '.join(text.split())
|
| 19 |
+
return text
|
| 20 |
+
|
| 21 |
+
# 3. Prediction function
|
| 22 |
+
def predict(text):
|
| 23 |
+
if not text:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
return "Please enter some text."
|
| 25 |
|
| 26 |
+
cleaned_text = clean_text(text)
|
| 27 |
+
vectorized_text = tfidf.transform([cleaned_text])
|
| 28 |
+
prediction = model.predict(vectorized_text)[0]
|
| 29 |
+
|
| 30 |
+
# Map numerical class to label
|
| 31 |
+
labels = {0: "Hate Speech", 1: "Offensive Language", 2: "Neither"}
|
| 32 |
+
return labels.get(prediction, "Unknown")
|
| 33 |
|
| 34 |
+
# 4. Build Gradio Interface
|
| 35 |
demo = gr.Interface(
|
| 36 |
+
fn=predict,
|
| 37 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
|
| 38 |
+
outputs=gr.Label(label="Classification Result"),
|
| 39 |
+
title="Hate Speech Detector",
|
| 40 |
+
description="This model classifies text into Hate Speech, Offensive Language, or Neither.",
|
| 41 |
examples=[
|
| 42 |
+
["I hope you have a wonderful day!"],
|
| 43 |
+
["You are so stupid and I hate you."],
|
| 44 |
+
["That person is a complete idiot."]
|
| 45 |
]
|
| 46 |
)
|
| 47 |
|
| 48 |
if __name__ == "__main__":
|
| 49 |
+
demo.launch()
|