darwinkernelpanic
/

moderat

+#!/usr/bin/env python3
+"""
+Hugging Face compatible inference for content moderation
+"""
+import pickle
+from huggingface_hub import hf_hub_download
+from enum import Enum
+class AgeMode(Enum):
+    UNDER_13 = "under_13"
+    TEEN_PLUS = "teen_plus"
+class ContentLabel(Enum):
+    SAFE = 0
+    HARASSMENT = 1
+    SWEARING_REACTION = 2
+    SWEARING_AGGRESSIVE = 3
+    HATE_SPEECH = 4
+    SPAM = 5
+class DualModeFilter:
+    """
+    Dual-mode content filter for Hugging Face
+    Usage:
+        filter = DualModeFilter("Naymmm/content-moderation-dual-mode")
+        result = filter.check("text here", age=15)
+    """
+    def __init__(self, repo_id="darwinkernelpanic/moderat", token=None):
+        # Download model from HF
+        model_path = hf_hub_download(
+            repo_id=repo_id,
+            filename="moderation_model.pkl",
+            token=token
+        )
+        # Load model
+        with open(model_path, 'rb') as f:
+            self.pipeline = pickle.load(f)
+        self.under_13_blocked = [1, 2, 3, 4, 5]
+        self.teen_plus_blocked = [1, 3, 4, 5]
+        self.label_names = [l.name for l in ContentLabel]
+    def predict(self, text):
+        """Predict label for text"""
+        prediction = self.pipeline.predict([text])[0]
+        probs = self.pipeline.predict_proba([text])[0]
+        confidence = max(probs)
+        return ContentLabel(prediction), confidence
+    def check(self, text, age):
+        """
+        Check content against age-appropriate filters
+        Args:
+            text: Text to check
+            age: User age (determines strict vs laxed mode)
+        Returns:
+            dict with 'allowed', 'label', 'confidence', 'mode', 'reason'
+        """
+        label, confidence = self.predict(text)
+        mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13
+        # Low confidence check
+        if confidence < 0.5:
+            return {
+                "allowed": True,
+                "label": "UNCERTAIN",
+                "confidence": confidence,
+                "mode": mode.value,
+                "reason": "Low confidence - manual review recommended"
+            }
+        # Check if blocked for this age
+        if age >= 13:
+            allowed = label.value not in self.teen_plus_blocked
+        else:
+            allowed = label.value not in self.under_13_blocked
+        reason = "Safe"
+        if not allowed:
+            if label == ContentLabel.SWEARING_REACTION and age >= 13:
+                reason = "Swearing permitted as reaction (13+)"
+                allowed = True
+            else:
+                reason = f"{label.name} detected"
+        return {
+            "allowed": allowed,
+            "label": label.name,
+            "confidence": confidence,
+            "mode": mode.value,
+            "reason": reason
+        }
+# Example usage
+if __name__ == "__main__":
+    print("Testing Dual-Mode Content Filter")
+    print("="*50)
+    # Initialize (downloads model from HF)
+    filter_sys = DualModeFilter()
+    tests = [
+        ("that was a great game", 10),
+        ("that was a great game", 15),
+        ("shit that sucks", 10),
+        ("shit that sucks", 15),
+        ("you're a piece of shit", 15),
+        ("kill yourself", 15),
+    ]
+    for text, age in tests:
+        result = filter_sys.check(text, age)
+        status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED"
+        print(f"\nAge {age}: '{text}'")
+        print(f"  {status} - {result['reason']}")
+        print(f"  Confidence: {result['confidence']:.2f}")