#!/usr/bin/env python3 """ Hugging Face compatible inference for content moderation """ import pickle from huggingface_hub import hf_hub_download from enum import Enum class AgeMode(Enum): UNDER_13 = "under_13" TEEN_PLUS = "teen_plus" class ContentLabel(Enum): SAFE = 0 HARASSMENT = 1 SWEARING_REACTION = 2 SWEARING_AGGRESSIVE = 3 HATE_SPEECH = 4 SPAM = 5 class DualModeFilter: """ Dual-mode content filter for Hugging Face Usage: filter = DualModeFilter("Naymmm/content-moderation-dual-mode") result = filter.check("text here", age=15) """ def __init__(self, repo_id="darwinkernelpanic/moderat", token=None): # Download model from HF model_path = hf_hub_download( repo_id=repo_id, filename="moderation_model.pkl", token=token ) # Load model with open(model_path, 'rb') as f: self.pipeline = pickle.load(f) self.under_13_blocked = [1, 2, 3, 4, 5] self.teen_plus_blocked = [1, 3, 4, 5] self.label_names = [l.name for l in ContentLabel] def predict(self, text): """Predict label for text""" prediction = self.pipeline.predict([text])[0] probs = self.pipeline.predict_proba([text])[0] confidence = max(probs) return ContentLabel(prediction), confidence def check(self, text, age): """ Check content against age-appropriate filters Args: text: Text to check age: User age (determines strict vs laxed mode) Returns: dict with 'allowed', 'label', 'confidence', 'mode', 'reason' """ label, confidence = self.predict(text) mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13 # Low confidence check if confidence < 0.5: return { "allowed": True, "label": "UNCERTAIN", "confidence": confidence, "mode": mode.value, "reason": "Low confidence - manual review recommended" } # Check if blocked for this age if age >= 13: allowed = label.value not in self.teen_plus_blocked else: allowed = label.value not in self.under_13_blocked reason = "Safe" if not allowed: if label == ContentLabel.SWEARING_REACTION and age >= 13: reason = "Swearing permitted as reaction (13+)" allowed = True else: reason = f"{label.name} detected" return { "allowed": allowed, "label": label.name, "confidence": confidence, "mode": mode.value, "reason": reason } # Example usage if __name__ == "__main__": print("Testing Dual-Mode Content Filter") print("="*50) # Initialize (downloads model from HF) filter_sys = DualModeFilter() tests = [ ("that was a great game", 10), ("that was a great game", 15), ("shit that sucks", 10), ("shit that sucks", 15), ("you're a piece of shit", 15), ("kill yourself", 15), ] for text, age in tests: result = filter_sys.check(text, age) status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED" print(f"\nAge {age}: '{text}'") print(f" {status} - {result['reason']}") print(f" Confidence: {result['confidence']:.2f}")