Spaces:

sbicy
/

prof-demo

Sleeping

App Files Files Community

prof-demo / src /core /ai_classifier.py

sbicy

Upload 17 files

deff797 verified 4 months ago

raw

history blame contribute delete

2.46 kB

	from typing import Optional
	from .classifier import ToxicityLevel
	from ..utils.config import config

	class AIClassifier:
	"""AI-powered toxicity classifier using Hugging Face models."""

	def __init__(self):
	self.model = None
	self.tokenizer = None
	self._initialized = False

	def _initialize(self):
	"""Lazy load the model to avoid startup delays."""
	if self._initialized:
	return

	try:
	from transformers import pipeline

	# Use a toxicity detection model
	# This model works without authentication
	self.model = pipeline(
	"text-classification",
	model="unitary/toxic-bert",
	top_k=None,
	token=config.HUGGINGFACE_TOKEN
	)
	self._initialized = True
	print("✓ AI Classifier initialized with toxic-bert model")
	except Exception as e:
	print(f"⚠ Could not initialize AI model: {e}")
	print(" Falling back to rule-based classification")
	self._initialized = False

	def classify(self, text: str) -> tuple[ToxicityLevel, dict]:
	"""
	Classify text using AI model.

	Returns:
	Tuple of (ToxicityLevel, confidence_scores)
	"""
	self._initialize()

	if not self._initialized or self.model is None:
	# Fallback to basic classification
	return ToxicityLevel.SAFE, {}

	try:
	results = self.model(text)[0]

	# toxic-bert returns labels like 'toxic', 'severe_toxic', 'obscene', etc.
	scores = {item['label']: item['score'] for item in results}

	# Determine toxicity level based on scores
	if scores.get('severe_toxic', 0) > 0.5:
	return ToxicityLevel.THREAT, scores
	elif scores.get('obscene', 0) > 0.5:
	return ToxicityLevel.EXPLICIT, scores
	elif scores.get('insult', 0) > 0.4:
	return ToxicityLevel.SLUR, scores
	elif scores.get('toxic', 0) > 0.3:
	return ToxicityLevel.MILD, scores
	else:
	return ToxicityLevel.SAFE, scores

	except Exception as e:
	print(f"Error during AI classification: {e}")
	return ToxicityLevel.SAFE, {}