File size: 3,608 Bytes

1ac7b0c

#!/usr/bin/env python3
"""
Hugging Face compatible inference for content moderation
"""
import pickle
from huggingface_hub import hf_hub_download
from enum import Enum

class AgeMode(Enum):
    UNDER_13 = "under_13"
    TEEN_PLUS = "teen_plus"

class ContentLabel(Enum):
    SAFE = 0
    HARASSMENT = 1
    SWEARING_REACTION = 2
    SWEARING_AGGRESSIVE = 3
    HATE_SPEECH = 4
    SPAM = 5

class DualModeFilter:
    """
    Dual-mode content filter for Hugging Face
    
    Usage:
        filter = DualModeFilter("Naymmm/content-moderation-dual-mode")
        result = filter.check("text here", age=15)
    """
    
    def __init__(self, repo_id="darwinkernelpanic/moderat", token=None):
        # Download model from HF
        model_path = hf_hub_download(
            repo_id=repo_id,
            filename="moderation_model.pkl",
            token=token
        )
        
        # Load model
        with open(model_path, 'rb') as f:
            self.pipeline = pickle.load(f)
        
        self.under_13_blocked = [1, 2, 3, 4, 5]
        self.teen_plus_blocked = [1, 3, 4, 5]
        self.label_names = [l.name for l in ContentLabel]
    
    def predict(self, text):
        """Predict label for text"""
        prediction = self.pipeline.predict([text])[0]
        probs = self.pipeline.predict_proba([text])[0]
        confidence = max(probs)
        return ContentLabel(prediction), confidence
    
    def check(self, text, age):
        """
        Check content against age-appropriate filters
        
        Args:
            text: Text to check
            age: User age (determines strict vs laxed mode)
        
        Returns:
            dict with 'allowed', 'label', 'confidence', 'mode', 'reason'
        """
        label, confidence = self.predict(text)
        mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13
        
        # Low confidence check
        if confidence < 0.5:
            return {
                "allowed": True,
                "label": "UNCERTAIN",
                "confidence": confidence,
                "mode": mode.value,
                "reason": "Low confidence - manual review recommended"
            }
        
        # Check if blocked for this age
        if age >= 13:
            allowed = label.value not in self.teen_plus_blocked
        else:
            allowed = label.value not in self.under_13_blocked
        
        reason = "Safe"
        if not allowed:
            if label == ContentLabel.SWEARING_REACTION and age >= 13:
                reason = "Swearing permitted as reaction (13+)"
                allowed = True
            else:
                reason = f"{label.name} detected"
        
        return {
            "allowed": allowed,
            "label": label.name,
            "confidence": confidence,
            "mode": mode.value,
            "reason": reason
        }

# Example usage
if __name__ == "__main__":
    print("Testing Dual-Mode Content Filter")
    print("="*50)
    
    # Initialize (downloads model from HF)
    filter_sys = DualModeFilter()
    
    tests = [
        ("that was a great game", 10),
        ("that was a great game", 15),
        ("shit that sucks", 10),
        ("shit that sucks", 15),
        ("you're a piece of shit", 15),
        ("kill yourself", 15),
    ]
    
    for text, age in tests:
        result = filter_sys.check(text, age)
        status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED"
        print(f"\nAge {age}: '{text}'")
        print(f"  {status} - {result['reason']}")
        print(f"  Confidence: {result['confidence']:.2f}")