File size: 3,608 Bytes
1ac7b0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
#!/usr/bin/env python3
"""
Hugging Face compatible inference for content moderation
"""
import pickle
from huggingface_hub import hf_hub_download
from enum import Enum
class AgeMode(Enum):
UNDER_13 = "under_13"
TEEN_PLUS = "teen_plus"
class ContentLabel(Enum):
SAFE = 0
HARASSMENT = 1
SWEARING_REACTION = 2
SWEARING_AGGRESSIVE = 3
HATE_SPEECH = 4
SPAM = 5
class DualModeFilter:
"""
Dual-mode content filter for Hugging Face
Usage:
filter = DualModeFilter("Naymmm/content-moderation-dual-mode")
result = filter.check("text here", age=15)
"""
def __init__(self, repo_id="darwinkernelpanic/moderat", token=None):
# Download model from HF
model_path = hf_hub_download(
repo_id=repo_id,
filename="moderation_model.pkl",
token=token
)
# Load model
with open(model_path, 'rb') as f:
self.pipeline = pickle.load(f)
self.under_13_blocked = [1, 2, 3, 4, 5]
self.teen_plus_blocked = [1, 3, 4, 5]
self.label_names = [l.name for l in ContentLabel]
def predict(self, text):
"""Predict label for text"""
prediction = self.pipeline.predict([text])[0]
probs = self.pipeline.predict_proba([text])[0]
confidence = max(probs)
return ContentLabel(prediction), confidence
def check(self, text, age):
"""
Check content against age-appropriate filters
Args:
text: Text to check
age: User age (determines strict vs laxed mode)
Returns:
dict with 'allowed', 'label', 'confidence', 'mode', 'reason'
"""
label, confidence = self.predict(text)
mode = AgeMode.TEEN_PLUS if age >= 13 else AgeMode.UNDER_13
# Low confidence check
if confidence < 0.5:
return {
"allowed": True,
"label": "UNCERTAIN",
"confidence": confidence,
"mode": mode.value,
"reason": "Low confidence - manual review recommended"
}
# Check if blocked for this age
if age >= 13:
allowed = label.value not in self.teen_plus_blocked
else:
allowed = label.value not in self.under_13_blocked
reason = "Safe"
if not allowed:
if label == ContentLabel.SWEARING_REACTION and age >= 13:
reason = "Swearing permitted as reaction (13+)"
allowed = True
else:
reason = f"{label.name} detected"
return {
"allowed": allowed,
"label": label.name,
"confidence": confidence,
"mode": mode.value,
"reason": reason
}
# Example usage
if __name__ == "__main__":
print("Testing Dual-Mode Content Filter")
print("="*50)
# Initialize (downloads model from HF)
filter_sys = DualModeFilter()
tests = [
("that was a great game", 10),
("that was a great game", 15),
("shit that sucks", 10),
("shit that sucks", 15),
("you're a piece of shit", 15),
("kill yourself", 15),
]
for text, age in tests:
result = filter_sys.check(text, age)
status = "✅ ALLOWED" if result["allowed"] else "❌ BLOCKED"
print(f"\nAge {age}: '{text}'")
print(f" {status} - {result['reason']}")
print(f" Confidence: {result['confidence']:.2f}")
|