Spaces:

VoltIC
/

Hate_speech_detector

Sleeping

App Files Files Community

VoltIC commited on Jan 3

Commit

3082536

verified ·

1 Parent(s): 31d5b26

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -69

app.py CHANGED Viewed

@@ -1,81 +1,49 @@
 import gradio as gr
 import joblib
 import re
-from sklearn.feature_extraction.text import TfidfVectorizer
-LABEL_NAMES = {
-    0: "Hate Speech",
-    1: "Offensive Language",
-    2: "Neutral"
-}
-class HateSpeechDetector:
-    def __init__(self, model_path='hate_speech_model_compatible.joblib'):
-        model_data = joblib.load(model_path)
-        ngram_range = model_data.get('tfidf_ngram_range', (1, 3))
-        max_features = model_data.get('tfidf_max_features', 10000)
-        self.tfidf = TfidfVectorizer(
-            vocabulary=model_data['tfidf_vocabulary'],
-            ngram_range=ngram_range,
-            max_features=max_features,
-            lowercase=True,
-            analyzer='word'
-        )
-        self.tfidf.idf_ = model_data['tfidf_idf']
-        self.classifier = model_data['classifier']
-        self.classes = model_data['classes']
-    def preprocess_text(self, text):
-        if not isinstance(text, str):
-            return ""
-        text = text.lower()
-        text = re.sub(r'https?://\S+|www\.\S+', '', text)
-        text = re.sub(r'[^\w\s!?.,]', ' ', text)
-        return ' '.join(text.split())
-    def predict(self, text):
-        processed_text = self.preprocess_text(text)
-        if not processed_text.strip():
-            return "No valid text provided", 0.0, {}
-        features = self.tfidf.transform([processed_text])
-        prediction = self.classifier.predict(features)[0]
-        probabilities = self.classifier.predict_proba(features)[0]
-        prob_dict = {LABEL_NAMES[i]: float(probabilities[i]) for i in range(len(self.classes))}
-        confidence = float(probabilities[prediction])
-        label = LABEL_NAMES.get(prediction, f"Class {prediction}")
-        return label, confidence, prob_dict
-try:
-    detector = HateSpeechDetector()
-    model_loaded = True
-except:
-    detector = None
-    model_loaded = False
-def analyze_text(text):
-    if not model_loaded:
-        return "Model failed to load."
-    if not text or not text.strip():
         return "Please enter some text."
-    label, confidence, _ = detector.predict(text)
-    return f"Prediction: {label}\nConfidence: {confidence:.1%}"
 demo = gr.Interface(
-    fn=analyze_text,
-    inputs=gr.Textbox(label="Enter text to analyze", lines=5),
-    outputs=gr.Textbox(label="Result"),
-    title="Hate Speech Detection",
     examples=[
-        ["I absolutely hate people like you, you should all die"],
-        ["This is so stupid, what a dumb idea from idiots"],
-        ["The weather is beautiful today, let's go for a walk in the park"]
     ]
 )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import joblib
 import re
+import string
+# 1. Load the model and vectorizer
+# Ensure 'hate_speech_model.joblib' is in the same directory
+checkpoint = joblib.load('hate_speech_model.joblib')
+model = checkpoint['model']
+tfidf = checkpoint['tfidf']
+# 2. Pre-processing function (must match the one used during training)
+def clean_text(text):
+    text = str(text).lower()
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\@\w+|\#','', text)
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    text = ' '.join(text.split())
+    return text
+# 3. Prediction function
+def predict(text):
+    if not text:
         return "Please enter some text."
+    cleaned_text = clean_text(text)
+    vectorized_text = tfidf.transform([cleaned_text])
+    prediction = model.predict(vectorized_text)[0]
+    # Map numerical class to label
+    labels = {0: "Hate Speech", 1: "Offensive Language", 2: "Neither"}
+    return labels.get(prediction, "Unknown")
+# 4. Build Gradio Interface
 demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
+    outputs=gr.Label(label="Classification Result"),
+    title="Hate Speech Detector",
+    description="This model classifies text into Hate Speech, Offensive Language, or Neither.",
     examples=[
+        ["I hope you have a wonderful day!"],
+        ["You are so stupid and I hate you."],
+        ["That person is a complete idiot."]
     ]
 )
 if __name__ == "__main__":
+    demo.launch()