Upload moderat_speed_test.ipynb with huggingface_hub
Browse files- moderat_speed_test.ipynb +93 -103
moderat_speed_test.ipynb
CHANGED
|
@@ -87,15 +87,6 @@
|
|
| 87 |
" HATE_SPEECH = 4\n",
|
| 88 |
" SPAM = 5\n",
|
| 89 |
"\n",
|
| 90 |
-
"class PIILabel(Enum):\n",
|
| 91 |
-
" SAFE = \"safe\"\n",
|
| 92 |
-
" EMAIL = \"email\"\n",
|
| 93 |
-
" PHONE = \"phone\"\n",
|
| 94 |
-
" ADDRESS = \"address\"\n",
|
| 95 |
-
" CREDIT_CARD = \"credit_card\"\n",
|
| 96 |
-
" SSN = \"ssn\"\n",
|
| 97 |
-
" SOCIAL_MEDIA = \"social_media\"\n",
|
| 98 |
-
"\n",
|
| 99 |
"print(\"β
Setup complete\")"
|
| 100 |
]
|
| 101 |
},
|
|
@@ -105,9 +96,9 @@
|
|
| 105 |
"metadata": {},
|
| 106 |
"outputs": [],
|
| 107 |
"source": [
|
| 108 |
-
"# @title 4. PII Detector Class\n",
|
| 109 |
"class PIIDetector:\n",
|
| 110 |
-
" \"\"\"Detect PII
|
| 111 |
" \n",
|
| 112 |
" def __init__(self):\n",
|
| 113 |
" self.email_pattern = re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b')\n",
|
|
@@ -115,18 +106,16 @@
|
|
| 115 |
" re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),\n",
|
| 116 |
" re.compile(r'\\b\\(\\d{3}\\)\\s?\\d{3}[-.]?\\d{4}\\b'),\n",
|
| 117 |
" re.compile(r'\\b\\d{4}\\s?\\d{3}\\s?\\d{3}\\b'),\n",
|
|
|
|
| 118 |
" ]\n",
|
| 119 |
-
" self.
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
-
" ]\n",
|
| 123 |
-
" self.grooming_keywords = [\n",
|
| 124 |
-
" 'dm me', 'private chat', 'dont tell your parents', 'secret',\n",
|
| 125 |
-
" 'send me pics', 'our little secret', 'meet up'\n",
|
| 126 |
-
" ]\n",
|
| 127 |
" \n",
|
| 128 |
" def scan(self, text, age):\n",
|
| 129 |
" pii_types = []\n",
|
|
|
|
| 130 |
" \n",
|
| 131 |
" # Check email\n",
|
| 132 |
" if self.email_pattern.search(text):\n",
|
|
@@ -138,28 +127,38 @@
|
|
| 138 |
" pii_types.append('phone')\n",
|
| 139 |
" break\n",
|
| 140 |
" \n",
|
| 141 |
-
" # Check
|
| 142 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
" has_social = any(domain in text_lower for domain in self.social_media_domains)\n",
|
| 144 |
" has_social = has_social or any(x in text_lower for x in ['instagram', 'snapchat', 'discord', 'tiktok'])\n",
|
| 145 |
" \n",
|
| 146 |
" if has_social:\n",
|
| 147 |
" pii_types.append('social_media')\n",
|
| 148 |
-
" # Check grooming\n",
|
| 149 |
-
" grooming_risk = sum(1 for kw in self.grooming_keywords if kw in text_lower)\n",
|
| 150 |
-
" \n",
|
| 151 |
" if age < 13:\n",
|
| 152 |
" return {'blocked': True, 'reason': 'Social media not allowed under 13', 'pii': pii_types}\n",
|
| 153 |
" elif grooming_risk > 0:\n",
|
| 154 |
" return {'blocked': True, 'reason': f'Potential grooming (risk: {grooming_risk})', 'pii': pii_types}\n",
|
| 155 |
-
"
|
| 156 |
-
"
|
| 157 |
-
" return {'blocked': True, 'reason': f'PII detected: {pii_types}', 'pii': pii_types}\n",
|
| 158 |
" \n",
|
| 159 |
" return {'blocked': False, 'reason': 'No PII', 'pii': []}\n",
|
| 160 |
"\n",
|
| 161 |
"pii_detector = PIIDetector()\n",
|
| 162 |
-
"print(\"β
PII detector ready\")"
|
| 163 |
]
|
| 164 |
},
|
| 165 |
{
|
|
@@ -170,10 +169,8 @@
|
|
| 170 |
"source": [
|
| 171 |
"# @title 5. Combined Filter Function\n",
|
| 172 |
"def check_content(text, age):\n",
|
| 173 |
-
" \"\"\"\n",
|
| 174 |
-
"
|
| 175 |
-
" Returns: {allowed, reason, content_label, pii_result}\n",
|
| 176 |
-
" \"\"\"\n",
|
| 177 |
" # Step 1: PII Check\n",
|
| 178 |
" pii_result = pii_detector.scan(text, age)\n",
|
| 179 |
" if pii_result['blocked']:\n",
|
|
@@ -191,8 +188,8 @@
|
|
| 191 |
" label = ContentLabel(prediction)\n",
|
| 192 |
" \n",
|
| 193 |
" # Age-based rules\n",
|
| 194 |
-
" under_13_blocked = [1, 2, 3, 4, 5]\n",
|
| 195 |
-
" teen_plus_blocked = [1, 3, 4, 5]\n",
|
| 196 |
" \n",
|
| 197 |
" if age >= 13:\n",
|
| 198 |
" allowed = label.value not in teen_plus_blocked\n",
|
|
@@ -214,7 +211,7 @@
|
|
| 214 |
" 'violation': 'CONTENT' if not allowed else None,\n",
|
| 215 |
" 'label': label.name,\n",
|
| 216 |
" 'confidence': confidence,\n",
|
| 217 |
-
" 'pii': []\n",
|
| 218 |
" }\n",
|
| 219 |
"\n",
|
| 220 |
"print(\"β
Combined filter ready\")"
|
|
@@ -226,11 +223,45 @@
|
|
| 226 |
"metadata": {},
|
| 227 |
"outputs": [],
|
| 228 |
"source": [
|
| 229 |
-
"# @title 6.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
"test_text = \"damn that's crazy\"\n",
|
| 231 |
"\n",
|
| 232 |
"# Warm up\n",
|
| 233 |
-
"_ =
|
| 234 |
"\n",
|
| 235 |
"# Time single inference\n",
|
| 236 |
"times = []\n",
|
|
@@ -241,7 +272,7 @@
|
|
| 241 |
" times.append((end - start) * 1000)\n",
|
| 242 |
"\n",
|
| 243 |
"avg_time = sum(times) / len(times)\n",
|
| 244 |
-
"print(f\"π Single Inference Speed (100 runs)\")\n",
|
| 245 |
"print(f\" Average: {avg_time:.3f} ms\")\n",
|
| 246 |
"print(f\" Min: {min(times):.3f} ms\")\n",
|
| 247 |
"print(f\" Max: {max(times):.3f} ms\")\n",
|
|
@@ -254,7 +285,7 @@
|
|
| 254 |
"metadata": {},
|
| 255 |
"outputs": [],
|
| 256 |
"source": [
|
| 257 |
-
"# @title
|
| 258 |
"test_cases = [\n",
|
| 259 |
" (\"that was a great game\", 10),\n",
|
| 260 |
" (\"that was a great game\", 15),\n",
|
|
@@ -262,48 +293,17 @@
|
|
| 262 |
" (\"shit that sucks\", 15),\n",
|
| 263 |
" (\"you're a piece of shit\", 15),\n",
|
| 264 |
" (\"kill yourself\", 12),\n",
|
| 265 |
-
" (\"My email is test@gmail.com\", 16),\n",
|
| 266 |
-
" (\"Follow me on instagram @user\", 14),\n",
|
| 267 |
-
" (\"DM me privately\", 14),\n",
|
| 268 |
" (\"damn that's crazy\", 10),\n",
|
| 269 |
"]\n",
|
| 270 |
"\n",
|
| 271 |
-
"print(\"π Dual-Mode
|
| 272 |
-
"print(f\"{'Text':<
|
| 273 |
-
"print(\"-\" *
|
| 274 |
"\n",
|
| 275 |
"for text, age in test_cases:\n",
|
| 276 |
" result = check_content(text, age)\n",
|
| 277 |
" status = \"β
ALLOW\" if result['allowed'] else \"β BLOCK\"\n",
|
| 278 |
-
" print(f\"{text:<
|
| 279 |
-
]
|
| 280 |
-
},
|
| 281 |
-
{
|
| 282 |
-
"cell_type": "code",
|
| 283 |
-
"execution_count": null,
|
| 284 |
-
"metadata": {},
|
| 285 |
-
"outputs": [],
|
| 286 |
-
"source": [
|
| 287 |
-
"# @title 8. PII Detection Specific Test\n",
|
| 288 |
-
"pii_tests = [\n",
|
| 289 |
-
" (\"Contact me at john@example.com\", 15),\n",
|
| 290 |
-
" (\"Call me 555-123-4567\", 16),\n",
|
| 291 |
-
" (\"I'm at 123 Main Street\", 14),\n",
|
| 292 |
-
" (\"My credit card is 4111-1111-1111-1111\", 15),\n",
|
| 293 |
-
" (\"Follow my instagram @cool\", 10),\n",
|
| 294 |
-
" (\"Follow my instagram @cool\", 15),\n",
|
| 295 |
-
" (\"DM me on snapchat, it's secret\", 15),\n",
|
| 296 |
-
" (\"Check my tiktok\", 16),\n",
|
| 297 |
-
"]\n",
|
| 298 |
-
"\n",
|
| 299 |
-
"print(\"π PII Detection Results\\n\")\n",
|
| 300 |
-
"for text, age in pii_tests:\n",
|
| 301 |
-
" result = check_content(text, age)\n",
|
| 302 |
-
" status = \"β
\" if result['allowed'] else \"β\"\n",
|
| 303 |
-
" pii_info = f\"PII: {result.get('pii', [])}\" if result.get('pii') else \"\"\n",
|
| 304 |
-
" print(f\"{status} Age {age}: {text[:40]}...\")\n",
|
| 305 |
-
" print(f\" β {result['reason']} {pii_info}\")\n",
|
| 306 |
-
" print()"
|
| 307 |
]
|
| 308 |
},
|
| 309 |
{
|
|
@@ -317,11 +317,11 @@
|
|
| 317 |
" \"that was a great game\",\n",
|
| 318 |
" \"shit that sucks\",\n",
|
| 319 |
" \"you're awesome\",\n",
|
| 320 |
-
" \"damn good job\",\n",
|
| 321 |
" \"My email is test@test.com\",\n",
|
| 322 |
" \"Follow me on instagram\",\n",
|
| 323 |
" \"kill yourself\",\n",
|
| 324 |
" \"nice work\",\n",
|
|
|
|
| 325 |
"] * 50 # 400 texts\n",
|
| 326 |
"\n",
|
| 327 |
"ages = [15] * len(batch_texts)\n",
|
|
@@ -348,34 +348,24 @@
|
|
| 348 |
"metadata": {},
|
| 349 |
"outputs": [],
|
| 350 |
"source": [
|
| 351 |
-
"# @title 10.
|
| 352 |
-
"
|
| 353 |
-
"
|
| 354 |
-
"print(
|
| 355 |
-
"print(
|
| 356 |
-
"print(
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
"
|
| 364 |
-
"\n",
|
| 365 |
-
"
|
| 366 |
-
"-
|
| 367 |
-
"
|
| 368 |
-
"
|
| 369 |
-
"- **Memory:** ~10-20MB\n",
|
| 370 |
-
"\n",
|
| 371 |
-
"## π Links\n",
|
| 372 |
-
"\n",
|
| 373 |
-
"- **Model:** https://huggingface.co/darwinkernelpanic/moderat\n",
|
| 374 |
-
"- **Features:**\n",
|
| 375 |
-
" - Content moderation (6 categories)\n",
|
| 376 |
-
" - PII detection (email, phone, address)\n",
|
| 377 |
-
" - Social media protection (age-based)\n",
|
| 378 |
-
" - Grooming detection (13+ mode)"
|
| 379 |
]
|
| 380 |
}
|
| 381 |
]
|
|
|
|
| 87 |
" HATE_SPEECH = 4\n",
|
| 88 |
" SPAM = 5\n",
|
| 89 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
"print(\"β
Setup complete\")"
|
| 91 |
]
|
| 92 |
},
|
|
|
|
| 96 |
"metadata": {},
|
| 97 |
"outputs": [],
|
| 98 |
"source": [
|
| 99 |
+
"# @title 4. PII Detector Class (FIXED)\n",
|
| 100 |
"class PIIDetector:\n",
|
| 101 |
+
" \"\"\"Detect PII with proper age-based social media rules\"\"\"\n",
|
| 102 |
" \n",
|
| 103 |
" def __init__(self):\n",
|
| 104 |
" self.email_pattern = re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b')\n",
|
|
|
|
| 106 |
" re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),\n",
|
| 107 |
" re.compile(r'\\b\\(\\d{3}\\)\\s?\\d{3}[-.]?\\d{4}\\b'),\n",
|
| 108 |
" re.compile(r'\\b\\d{4}\\s?\\d{3}\\s?\\d{3}\\b'),\n",
|
| 109 |
+
" re.compile(r'\\b\\d{3}[-.]?\\d{4}\\b'),\n",
|
| 110 |
" ]\n",
|
| 111 |
+
" self.address_pattern = re.compile(r'\\b\\d+\\s+[A-Za-z]+\\s+(?:Street|St|Avenue|Ave|Road|Rd|Lane|Ln|Drive|Dr)\\b', re.IGNORECASE)\n",
|
| 112 |
+
" self.cc_pattern = re.compile(r'\\b(?:\\d{4}[-\\s]?){3}\\d{4}\\b|\\b\\d{16}\\b')\n",
|
| 113 |
+
" self.social_media_domains = ['instagram.com', 'instagr.am', 'twitter.com', 'x.com', 'tiktok.com', 'snapchat.com', 'discord.com', 'discord.gg']\n",
|
| 114 |
+
" self.grooming_keywords = ['dm me', 'private chat', 'dont tell your parents', 'secret', 'send me pics', 'our little secret', 'meet up']\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
" \n",
|
| 116 |
" def scan(self, text, age):\n",
|
| 117 |
" pii_types = []\n",
|
| 118 |
+
" text_lower = text.lower()\n",
|
| 119 |
" \n",
|
| 120 |
" # Check email\n",
|
| 121 |
" if self.email_pattern.search(text):\n",
|
|
|
|
| 127 |
" pii_types.append('phone')\n",
|
| 128 |
" break\n",
|
| 129 |
" \n",
|
| 130 |
+
" # Check address\n",
|
| 131 |
+
" if self.address_pattern.search(text):\n",
|
| 132 |
+
" pii_types.append('address')\n",
|
| 133 |
+
" \n",
|
| 134 |
+
" # Check credit card\n",
|
| 135 |
+
" if self.cc_pattern.search(text):\n",
|
| 136 |
+
" pii_types.append('credit_card')\n",
|
| 137 |
+
" \n",
|
| 138 |
+
" # Check grooming\n",
|
| 139 |
+
" grooming_risk = sum(1 for kw in self.grooming_keywords if kw in text_lower)\n",
|
| 140 |
+
" \n",
|
| 141 |
+
" # Priority: Critical PII first (blocked for all ages)\n",
|
| 142 |
+
" if any(pii in ['email', 'phone', 'address', 'credit_card'] for pii in pii_types):\n",
|
| 143 |
+
" return {'blocked': True, 'reason': f'PII detected: {pii_types}', 'pii': pii_types}\n",
|
| 144 |
+
" \n",
|
| 145 |
+
" # Social media check\n",
|
| 146 |
" has_social = any(domain in text_lower for domain in self.social_media_domains)\n",
|
| 147 |
" has_social = has_social or any(x in text_lower for x in ['instagram', 'snapchat', 'discord', 'tiktok'])\n",
|
| 148 |
" \n",
|
| 149 |
" if has_social:\n",
|
| 150 |
" pii_types.append('social_media')\n",
|
|
|
|
|
|
|
|
|
|
| 151 |
" if age < 13:\n",
|
| 152 |
" return {'blocked': True, 'reason': 'Social media not allowed under 13', 'pii': pii_types}\n",
|
| 153 |
" elif grooming_risk > 0:\n",
|
| 154 |
" return {'blocked': True, 'reason': f'Potential grooming (risk: {grooming_risk})', 'pii': pii_types}\n",
|
| 155 |
+
" else:\n",
|
| 156 |
+
" return {'blocked': False, 'reason': 'Social media OK for 13+', 'pii': pii_types}\n",
|
|
|
|
| 157 |
" \n",
|
| 158 |
" return {'blocked': False, 'reason': 'No PII', 'pii': []}\n",
|
| 159 |
"\n",
|
| 160 |
"pii_detector = PIIDetector()\n",
|
| 161 |
+
"print(\"β
PII detector ready (FIXED)\")"
|
| 162 |
]
|
| 163 |
},
|
| 164 |
{
|
|
|
|
| 169 |
"source": [
|
| 170 |
"# @title 5. Combined Filter Function\n",
|
| 171 |
"def check_content(text, age):\n",
|
| 172 |
+
" \"\"\"Combined content moderation + PII check\"\"\"\n",
|
| 173 |
+
" \n",
|
|
|
|
|
|
|
| 174 |
" # Step 1: PII Check\n",
|
| 175 |
" pii_result = pii_detector.scan(text, age)\n",
|
| 176 |
" if pii_result['blocked']:\n",
|
|
|
|
| 188 |
" label = ContentLabel(prediction)\n",
|
| 189 |
" \n",
|
| 190 |
" # Age-based rules\n",
|
| 191 |
+
" under_13_blocked = [1, 2, 3, 4, 5] # All except SAFE\n",
|
| 192 |
+
" teen_plus_blocked = [1, 3, 4, 5] # Allow SWEARING_REACTION\n",
|
| 193 |
" \n",
|
| 194 |
" if age >= 13:\n",
|
| 195 |
" allowed = label.value not in teen_plus_blocked\n",
|
|
|
|
| 211 |
" 'violation': 'CONTENT' if not allowed else None,\n",
|
| 212 |
" 'label': label.name,\n",
|
| 213 |
" 'confidence': confidence,\n",
|
| 214 |
+
" 'pii': pii_result.get('pii', [])\n",
|
| 215 |
" }\n",
|
| 216 |
"\n",
|
| 217 |
"print(\"β
Combined filter ready\")"
|
|
|
|
| 223 |
"metadata": {},
|
| 224 |
"outputs": [],
|
| 225 |
"source": [
|
| 226 |
+
"# @title 6. PII Detection Tests (FIXED)\n",
|
| 227 |
+
"print(\"π PII Detection Results (Fixed)\\n\")\n",
|
| 228 |
+
"print(\"Expected: Address and Credit Card now detected correctly\")\n",
|
| 229 |
+
"print(\"Expected: Social media ALLOWED for 13+ (unless grooming)\\n\")\n",
|
| 230 |
+
"print(\"=\"*70)\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"pii_tests = [\n",
|
| 233 |
+
" (\"Contact me at john@example.com\", 15, \"Email - should block\"),\n",
|
| 234 |
+
" (\"Call me 555-123-4567\", 16, \"Phone - should block\"),\n",
|
| 235 |
+
" (\"I'm at 123 Main Street\", 14, \"Address - should block\"),\n",
|
| 236 |
+
" (\"My credit card is 4111-1111-1111-1111\", 15, \"Credit Card - should block\"),\n",
|
| 237 |
+
" (\"Follow my instagram @cool\", 10, \"Social <13 - should block\"),\n",
|
| 238 |
+
" (\"Follow my instagram @cool\", 15, \"Social 13+ - should ALLOW\"),\n",
|
| 239 |
+
" (\"DM me on snapchat, it's secret\", 15, \"Grooming - should block\"),\n",
|
| 240 |
+
" (\"Check my tiktok\", 16, \"Social 16+ - should ALLOW\"),\n",
|
| 241 |
+
"]\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"for text, age, note in pii_tests:\n",
|
| 244 |
+
" result = check_content(text, age)\n",
|
| 245 |
+
" status = \"β
\" if result['allowed'] else \"β\"\n",
|
| 246 |
+
" print(f\"{status} Age {age}: {text[:45]}\")\n",
|
| 247 |
+
" print(f\" β {result['reason']}\")\n",
|
| 248 |
+
" if result.get('pii'):\n",
|
| 249 |
+
" print(f\" PII: {result['pii']}\")\n",
|
| 250 |
+
" print(f\" Note: {note}\")\n",
|
| 251 |
+
" print()"
|
| 252 |
+
]
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"cell_type": "code",
|
| 256 |
+
"execution_count": null,
|
| 257 |
+
"metadata": {},
|
| 258 |
+
"outputs": [],
|
| 259 |
+
"source": [
|
| 260 |
+
"# @title 7. Speed Test - Single Inference\n",
|
| 261 |
"test_text = \"damn that's crazy\"\n",
|
| 262 |
"\n",
|
| 263 |
"# Warm up\n",
|
| 264 |
+
"_ = check_content(test_text, 15)\n",
|
| 265 |
"\n",
|
| 266 |
"# Time single inference\n",
|
| 267 |
"times = []\n",
|
|
|
|
| 272 |
" times.append((end - start) * 1000)\n",
|
| 273 |
"\n",
|
| 274 |
"avg_time = sum(times) / len(times)\n",
|
| 275 |
+
"print(f\"π Single Inference Speed (100 runs, with PII check)\")\n",
|
| 276 |
"print(f\" Average: {avg_time:.3f} ms\")\n",
|
| 277 |
"print(f\" Min: {min(times):.3f} ms\")\n",
|
| 278 |
"print(f\" Max: {max(times):.3f} ms\")\n",
|
|
|
|
| 285 |
"metadata": {},
|
| 286 |
"outputs": [],
|
| 287 |
"source": [
|
| 288 |
+
"# @title 8. Dual-Mode Content Test\n",
|
| 289 |
"test_cases = [\n",
|
| 290 |
" (\"that was a great game\", 10),\n",
|
| 291 |
" (\"that was a great game\", 15),\n",
|
|
|
|
| 293 |
" (\"shit that sucks\", 15),\n",
|
| 294 |
" (\"you're a piece of shit\", 15),\n",
|
| 295 |
" (\"kill yourself\", 12),\n",
|
|
|
|
|
|
|
|
|
|
| 296 |
" (\"damn that's crazy\", 10),\n",
|
| 297 |
"]\n",
|
| 298 |
"\n",
|
| 299 |
+
"print(\"π Dual-Mode Content Results\\n\")\n",
|
| 300 |
+
"print(f\"{'Text':<30} {'Age':<6} {'Status':<10} {'Reason':<30}\")\n",
|
| 301 |
+
"print(\"-\" * 80)\n",
|
| 302 |
"\n",
|
| 303 |
"for text, age in test_cases:\n",
|
| 304 |
" result = check_content(text, age)\n",
|
| 305 |
" status = \"β
ALLOW\" if result['allowed'] else \"β BLOCK\"\n",
|
| 306 |
+
" print(f\"{text:<30} {age:<6} {status:<10} {result['reason'][:28]:<30}\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
]
|
| 308 |
},
|
| 309 |
{
|
|
|
|
| 317 |
" \"that was a great game\",\n",
|
| 318 |
" \"shit that sucks\",\n",
|
| 319 |
" \"you're awesome\",\n",
|
|
|
|
| 320 |
" \"My email is test@test.com\",\n",
|
| 321 |
" \"Follow me on instagram\",\n",
|
| 322 |
" \"kill yourself\",\n",
|
| 323 |
" \"nice work\",\n",
|
| 324 |
+
" \"Check my tiktok\",\n",
|
| 325 |
"] * 50 # 400 texts\n",
|
| 326 |
"\n",
|
| 327 |
"ages = [15] * len(batch_texts)\n",
|
|
|
|
| 348 |
"metadata": {},
|
| 349 |
"outputs": [],
|
| 350 |
"source": [
|
| 351 |
+
"# @title 10. Summary\n",
|
| 352 |
+
"print(\"π moderat Summary\")\n",
|
| 353 |
+
"print(\"=\"*60)\n",
|
| 354 |
+
"print(\"\")\n",
|
| 355 |
+
"print(\"β
Content Moderation:\")\n",
|
| 356 |
+
"print(\" - 6 categories (Safe, Harassment, Swearing, Hate, Spam)\")\n",
|
| 357 |
+
"print(\" - Dual-mode: <13 strict, 13+ laxed\")\n",
|
| 358 |
+
"print(\"\")\n",
|
| 359 |
+
"print(\"β
PII Detection:\")\n",
|
| 360 |
+
"print(\" - Email, Phone, Address, Credit Card (all ages blocked)\")\n",
|
| 361 |
+
"print(\" - Social Media: <13 blocked, 13+ allowed\")\n",
|
| 362 |
+
"print(\" - Grooming detection for 13+\")\n",
|
| 363 |
+
"print(\"\")\n",
|
| 364 |
+
"print(\"π Speed:\")\n",
|
| 365 |
+
"print(\" - ~3-7ms per inference (with PII)\")\n",
|
| 366 |
+
"print(\" - ~200-500 texts/sec batch\")\n",
|
| 367 |
+
"print(\"\")\n",
|
| 368 |
+
"print(\"π https://huggingface.co/darwinkernelpanic/moderat\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
]
|
| 370 |
}
|
| 371 |
]
|