from typing import Optional from .classifier import ToxicityLevel from ..utils.config import config class AIClassifier: """AI-powered toxicity classifier using Hugging Face models.""" def __init__(self): self.model = None self.tokenizer = None self._initialized = False def _initialize(self): """Lazy load the model to avoid startup delays.""" if self._initialized: return try: from transformers import pipeline # Use a toxicity detection model # This model works without authentication self.model = pipeline( "text-classification", model="unitary/toxic-bert", top_k=None, token=config.HUGGINGFACE_TOKEN ) self._initialized = True print("✓ AI Classifier initialized with toxic-bert model") except Exception as e: print(f"⚠ Could not initialize AI model: {e}") print(" Falling back to rule-based classification") self._initialized = False def classify(self, text: str) -> tuple[ToxicityLevel, dict]: """ Classify text using AI model. Returns: Tuple of (ToxicityLevel, confidence_scores) """ self._initialize() if not self._initialized or self.model is None: # Fallback to basic classification return ToxicityLevel.SAFE, {} try: results = self.model(text)[0] # toxic-bert returns labels like 'toxic', 'severe_toxic', 'obscene', etc. scores = {item['label']: item['score'] for item in results} # Determine toxicity level based on scores if scores.get('severe_toxic', 0) > 0.5: return ToxicityLevel.THREAT, scores elif scores.get('obscene', 0) > 0.5: return ToxicityLevel.EXPLICIT, scores elif scores.get('insult', 0) > 0.4: return ToxicityLevel.SLUR, scores elif scores.get('toxic', 0) > 0.3: return ToxicityLevel.MILD, scores else: return ToxicityLevel.SAFE, scores except Exception as e: print(f"Error during AI classification: {e}") return ToxicityLevel.SAFE, {}