|
|
from typing import Optional |
|
|
from .classifier import ToxicityLevel |
|
|
from ..utils.config import config |
|
|
|
|
|
class AIClassifier: |
|
|
"""AI-powered toxicity classifier using Hugging Face models.""" |
|
|
|
|
|
def __init__(self): |
|
|
self.model = None |
|
|
self.tokenizer = None |
|
|
self._initialized = False |
|
|
|
|
|
def _initialize(self): |
|
|
"""Lazy load the model to avoid startup delays.""" |
|
|
if self._initialized: |
|
|
return |
|
|
|
|
|
try: |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
|
|
self.model = pipeline( |
|
|
"text-classification", |
|
|
model="unitary/toxic-bert", |
|
|
top_k=None, |
|
|
token=config.HUGGINGFACE_TOKEN |
|
|
) |
|
|
self._initialized = True |
|
|
print("✓ AI Classifier initialized with toxic-bert model") |
|
|
except Exception as e: |
|
|
print(f"⚠ Could not initialize AI model: {e}") |
|
|
print(" Falling back to rule-based classification") |
|
|
self._initialized = False |
|
|
|
|
|
def classify(self, text: str) -> tuple[ToxicityLevel, dict]: |
|
|
""" |
|
|
Classify text using AI model. |
|
|
|
|
|
Returns: |
|
|
Tuple of (ToxicityLevel, confidence_scores) |
|
|
""" |
|
|
self._initialize() |
|
|
|
|
|
if not self._initialized or self.model is None: |
|
|
|
|
|
return ToxicityLevel.SAFE, {} |
|
|
|
|
|
try: |
|
|
results = self.model(text)[0] |
|
|
|
|
|
|
|
|
scores = {item['label']: item['score'] for item in results} |
|
|
|
|
|
|
|
|
if scores.get('severe_toxic', 0) > 0.5: |
|
|
return ToxicityLevel.THREAT, scores |
|
|
elif scores.get('obscene', 0) > 0.5: |
|
|
return ToxicityLevel.EXPLICIT, scores |
|
|
elif scores.get('insult', 0) > 0.4: |
|
|
return ToxicityLevel.SLUR, scores |
|
|
elif scores.get('toxic', 0) > 0.3: |
|
|
return ToxicityLevel.MILD, scores |
|
|
else: |
|
|
return ToxicityLevel.SAFE, scores |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error during AI classification: {e}") |
|
|
return ToxicityLevel.SAFE, {} |
|
|
|