VoltIC commited on
Commit
3082536
·
verified ·
1 Parent(s): 31d5b26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -69
app.py CHANGED
@@ -1,81 +1,49 @@
1
  import gradio as gr
2
  import joblib
3
  import re
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
-
6
- LABEL_NAMES = {
7
- 0: "Hate Speech",
8
- 1: "Offensive Language",
9
- 2: "Neutral"
10
- }
11
-
12
- class HateSpeechDetector:
13
- def __init__(self, model_path='hate_speech_model_compatible.joblib'):
14
- model_data = joblib.load(model_path)
15
- ngram_range = model_data.get('tfidf_ngram_range', (1, 3))
16
- max_features = model_data.get('tfidf_max_features', 10000)
17
-
18
- self.tfidf = TfidfVectorizer(
19
- vocabulary=model_data['tfidf_vocabulary'],
20
- ngram_range=ngram_range,
21
- max_features=max_features,
22
- lowercase=True,
23
- analyzer='word'
24
- )
25
- self.tfidf.idf_ = model_data['tfidf_idf']
26
- self.classifier = model_data['classifier']
27
- self.classes = model_data['classes']
28
-
29
- def preprocess_text(self, text):
30
- if not isinstance(text, str):
31
- return ""
32
- text = text.lower()
33
- text = re.sub(r'https?://\S+|www\.\S+', '', text)
34
- text = re.sub(r'[^\w\s!?.,]', ' ', text)
35
- return ' '.join(text.split())
36
-
37
- def predict(self, text):
38
- processed_text = self.preprocess_text(text)
39
- if not processed_text.strip():
40
- return "No valid text provided", 0.0, {}
41
-
42
- features = self.tfidf.transform([processed_text])
43
- prediction = self.classifier.predict(features)[0]
44
- probabilities = self.classifier.predict_proba(features)[0]
45
-
46
- prob_dict = {LABEL_NAMES[i]: float(probabilities[i]) for i in range(len(self.classes))}
47
- confidence = float(probabilities[prediction])
48
- label = LABEL_NAMES.get(prediction, f"Class {prediction}")
49
-
50
- return label, confidence, prob_dict
51
-
52
- try:
53
- detector = HateSpeechDetector()
54
- model_loaded = True
55
- except:
56
- detector = None
57
- model_loaded = False
58
-
59
- def analyze_text(text):
60
- if not model_loaded:
61
- return "Model failed to load."
62
- if not text or not text.strip():
63
  return "Please enter some text."
64
 
65
- label, confidence, _ = detector.predict(text)
66
- return f"Prediction: {label}\nConfidence: {confidence:.1%}"
 
 
 
 
 
67
 
 
68
  demo = gr.Interface(
69
- fn=analyze_text,
70
- inputs=gr.Textbox(label="Enter text to analyze", lines=5),
71
- outputs=gr.Textbox(label="Result"),
72
- title="Hate Speech Detection",
 
73
  examples=[
74
- ["I absolutely hate people like you, you should all die"],
75
- ["This is so stupid, what a dumb idea from idiots"],
76
- ["The weather is beautiful today, let's go for a walk in the park"]
77
  ]
78
  )
79
 
80
  if __name__ == "__main__":
81
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import joblib
3
  import re
4
+ import string
5
+
6
+ # 1. Load the model and vectorizer
7
+ # Ensure 'hate_speech_model.joblib' is in the same directory
8
+ checkpoint = joblib.load('hate_speech_model.joblib')
9
+ model = checkpoint['model']
10
+ tfidf = checkpoint['tfidf']
11
+
12
+ # 2. Pre-processing function (must match the one used during training)
13
+ def clean_text(text):
14
+ text = str(text).lower()
15
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
16
+ text = re.sub(r'\@\w+|\#','', text)
17
+ text = text.translate(str.maketrans('', '', string.punctuation))
18
+ text = ' '.join(text.split())
19
+ return text
20
+
21
+ # 3. Prediction function
22
+ def predict(text):
23
+ if not text:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return "Please enter some text."
25
 
26
+ cleaned_text = clean_text(text)
27
+ vectorized_text = tfidf.transform([cleaned_text])
28
+ prediction = model.predict(vectorized_text)[0]
29
+
30
+ # Map numerical class to label
31
+ labels = {0: "Hate Speech", 1: "Offensive Language", 2: "Neither"}
32
+ return labels.get(prediction, "Unknown")
33
 
34
+ # 4. Build Gradio Interface
35
  demo = gr.Interface(
36
+ fn=predict,
37
+ inputs=gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"),
38
+ outputs=gr.Label(label="Classification Result"),
39
+ title="Hate Speech Detector",
40
+ description="This model classifies text into Hate Speech, Offensive Language, or Neither.",
41
  examples=[
42
+ ["I hope you have a wonderful day!"],
43
+ ["You are so stupid and I hate you."],
44
+ ["That person is a complete idiot."]
45
  ]
46
  )
47
 
48
  if __name__ == "__main__":
49
+ demo.launch()