sgAtdbd commited on
Commit
33dfaba
Β·
verified Β·
1 Parent(s): 249be5e

Update models/hate_speech_classifier.py

Browse files
Files changed (1) hide show
  1. models/hate_speech_classifier.py +123 -3
models/hate_speech_classifier.py CHANGED
@@ -204,15 +204,121 @@ class HateSpeechClassifier:
204
  print(f"❌ Error loading {model_key} pretrained model: {e}")
205
  model_info["pipeline"] = None
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  async def classify_with_custom_model(self, text: str, language: str) -> Dict:
208
  """Classify using language-specific custom model"""
209
  if language == "english":
210
  if not self.english_model_loaded:
 
211
  return None
212
  model = self.english_model
213
  vectorizer = self.english_vectorizer
214
  elif language == "bengali":
215
  if not self.bengali_model_loaded:
 
216
  return None
217
  model = self.bengali_model
218
  vectorizer = self.bengali_vectorizer
@@ -220,13 +326,25 @@ class HateSpeechClassifier:
220
  return None
221
 
222
  try:
 
 
 
 
223
  X = vectorizer.transform([text])
224
  prediction = model.predict(X)[0]
225
 
 
 
226
  if hasattr(model, 'predict_proba'):
227
  probabilities = model.predict_proba(X)[0]
228
- confidence = float(max(probabilities))
 
 
 
 
 
229
  else:
 
230
  confidence = 0.75
231
 
232
  if language == "english":
@@ -246,12 +364,14 @@ class HateSpeechClassifier:
246
  "category": category,
247
  "confidence": confidence,
248
  "method": f"custom_model_{language}",
249
- "raw_prediction": int(prediction)
 
250
  }
251
  except Exception as e:
252
  print(f"❌ Custom model classification failed: {e}")
 
 
253
  return None
254
-
255
  async def classify_with_pretrained_model(self, text: str, language: str = "english") -> Dict:
256
  """Classify using ensemble of pretrained models with translation support"""
257
 
 
204
  print(f"❌ Error loading {model_key} pretrained model: {e}")
205
  model_info["pipeline"] = None
206
 
207
+ # async def classify_with_custom_model(self, text: str, language: str) -> Dict:
208
+ # """Classify using language-specific custom model"""
209
+ # if language == "english":
210
+ # if not self.english_model_loaded:
211
+ # return None
212
+ # model = self.english_model
213
+ # vectorizer = self.english_vectorizer
214
+ # elif language == "bengali":
215
+ # if not self.bengali_model_loaded:
216
+ # return None
217
+ # model = self.bengali_model
218
+ # vectorizer = self.bengali_vectorizer
219
+ # else:
220
+ # return None
221
+
222
+ # try:
223
+ # X = vectorizer.transform([text])
224
+ # prediction = model.predict(X)[0]
225
+
226
+ # if hasattr(model, 'predict_proba'):
227
+ # probabilities = model.predict_proba(X)[0]
228
+ # confidence = float(max(probabilities))
229
+ # else:
230
+ # confidence = 0.75
231
+
232
+ # if language == "english":
233
+ # if prediction == 0:
234
+ # category = "neutral"
235
+ # else:
236
+ # category = "hate_speech"
237
+ # else:
238
+ # if prediction == 0:
239
+ # category = "neutral"
240
+ # elif prediction == 1:
241
+ # category = "offensive"
242
+ # else:
243
+ # category = "hate_speech"
244
+
245
+ # return {
246
+ # "category": category,
247
+ # "confidence": confidence,
248
+ # "method": f"custom_model_{language}",
249
+ # "raw_prediction": int(prediction)
250
+ # }
251
+ # except Exception as e:
252
+ # print(f"❌ Custom model classification failed: {e}")
253
+ # return None
254
+ # async def classify_with_custom_model(self, text: str, language: str) -> Dict:
255
+ # """Classify using language-specific custom model"""
256
+ # if language == "english":
257
+ # if not self.english_model_loaded:
258
+ # return None
259
+ # model = self.english_model
260
+ # vectorizer = self.english_vectorizer
261
+ # elif language == "bengali":
262
+ # if not self.bengali_model_loaded:
263
+ # return None
264
+ # model = self.bengali_model
265
+ # vectorizer = self.bengali_vectorizer
266
+ # else:
267
+ # return None
268
+
269
+ # try:
270
+ # X = vectorizer.transform([text])
271
+ # prediction = model.predict(X)[0]
272
+
273
+ # if hasattr(model, 'predict_proba'):
274
+ # probabilities = model.predict_proba(X)[0]
275
+ # # βœ… FIX: Use probability of the PREDICTED class, not max
276
+ # confidence = float(probabilities[prediction])
277
+
278
+ # # Debug logging
279
+ # print(f"πŸ” Custom Model Debug:")
280
+ # print(f" Prediction: {prediction}")
281
+ # print(f" Probabilities: {probabilities}")
282
+ # print(f" Confidence: {confidence:.4f}")
283
+ # else:
284
+ # confidence = 0.75
285
+
286
+ # if language == "english":
287
+ # if prediction == 0:
288
+ # category = "neutral"
289
+ # else:
290
+ # category = "hate_speech"
291
+ # else:
292
+ # if prediction == 0:
293
+ # category = "neutral"
294
+ # elif prediction == 1:
295
+ # category = "offensive"
296
+ # else:
297
+ # category = "hate_speech"
298
+
299
+ # return {
300
+ # "category": category,
301
+ # "confidence": confidence,
302
+ # "method": f"custom_model_{language}",
303
+ # "raw_prediction": int(prediction),
304
+ # "probabilities": probabilities.tolist() if hasattr(model, 'predict_proba') else None
305
+ # }
306
+ # except Exception as e:
307
+ # print(f"❌ Custom model classification failed: {e}")
308
+ # import traceback
309
+ # traceback.print_exc()
310
+ # return None
311
  async def classify_with_custom_model(self, text: str, language: str) -> Dict:
312
  """Classify using language-specific custom model"""
313
  if language == "english":
314
  if not self.english_model_loaded:
315
+ print("❌ English model not loaded, returning None")
316
  return None
317
  model = self.english_model
318
  vectorizer = self.english_vectorizer
319
  elif language == "bengali":
320
  if not self.bengali_model_loaded:
321
+ print("❌ Bengali model not loaded, returning None")
322
  return None
323
  model = self.bengali_model
324
  vectorizer = self.bengali_vectorizer
 
326
  return None
327
 
328
  try:
329
+ # Debug: Check model type
330
+ print(f"πŸ” Model type: {type(model)}")
331
+ print(f"πŸ” Has predict_proba: {hasattr(model, 'predict_proba')}")
332
+
333
  X = vectorizer.transform([text])
334
  prediction = model.predict(X)[0]
335
 
336
+ print(f"πŸ” Raw prediction: {prediction}")
337
+
338
  if hasattr(model, 'predict_proba'):
339
  probabilities = model.predict_proba(X)[0]
340
+ confidence = float(probabilities[prediction])
341
+
342
+ print(f"πŸ” Custom Model Debug:")
343
+ print(f" Prediction: {prediction}")
344
+ print(f" Probabilities: {probabilities}")
345
+ print(f" Confidence (probabilities[{prediction}]): {confidence:.4f}")
346
  else:
347
+ print("⚠️ Model doesn't have predict_proba, using fallback 0.75")
348
  confidence = 0.75
349
 
350
  if language == "english":
 
364
  "category": category,
365
  "confidence": confidence,
366
  "method": f"custom_model_{language}",
367
+ "raw_prediction": int(prediction),
368
+ "probabilities": probabilities.tolist() if hasattr(model, 'predict_proba') else None
369
  }
370
  except Exception as e:
371
  print(f"❌ Custom model classification failed: {e}")
372
+ import traceback
373
+ traceback.print_exc()
374
  return None
 
375
  async def classify_with_pretrained_model(self, text: str, language: str = "english") -> Dict:
376
  """Classify using ensemble of pretrained models with translation support"""
377