darwinkernelpanic
/

moderat

@@ -87,15 +87,6 @@
         "    HATE_SPEECH = 4\n",
         "    SPAM = 5\n",
         "\n",
-        "class PIILabel(Enum):\n",
-        "    SAFE = \"safe\"\n",
-        "    EMAIL = \"email\"\n",
-        "    PHONE = \"phone\"\n",
-        "    ADDRESS = \"address\"\n",
-        "    CREDIT_CARD = \"credit_card\"\n",
-        "    SSN = \"ssn\"\n",
-        "    SOCIAL_MEDIA = \"social_media\"\n",
-        "\n",
         "print(\"✅ Setup complete\")"
       ]
     },
@@ -105,9 +96,9 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 4. PII Detector Class\n",
         "class PIIDetector:\n",
-        "    \"\"\"Detect PII in text\"\"\"\n",
         "    \n",
         "    def __init__(self):\n",
         "        self.email_pattern = re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b')\n",
@@ -115,18 +106,16 @@
         "            re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),\n",
         "            re.compile(r'\\b\\(\\d{3}\\)\\s?\\d{3}[-.]?\\d{4}\\b'),\n",
         "            re.compile(r'\\b\\d{4}\\s?\\d{3}\\s?\\d{3}\\b'),\n",
         "        ]\n",
-        "        self.social_media_domains = [\n",
-        "            'instagram.com', 'instagr.am', 'twitter.com', 'x.com',\n",
-        "            'tiktok.com', 'snapchat.com', 'discord.com', 'discord.gg'\n",
-        "        ]\n",
-        "        self.grooming_keywords = [\n",
-        "            'dm me', 'private chat', 'dont tell your parents', 'secret',\n",
-        "            'send me pics', 'our little secret', 'meet up'\n",
-        "        ]\n",
         "    \n",
         "    def scan(self, text, age):\n",
         "        pii_types = []\n",
         "        \n",
         "        # Check email\n",
         "        if self.email_pattern.search(text):\n",
@@ -138,28 +127,38 @@
         "                pii_types.append('phone')\n",
         "                break\n",
         "        \n",
-        "        # Check social media\n",
-        "        text_lower = text.lower()\n",
         "        has_social = any(domain in text_lower for domain in self.social_media_domains)\n",
         "        has_social = has_social or any(x in text_lower for x in ['instagram', 'snapchat', 'discord', 'tiktok'])\n",
         "        \n",
         "        if has_social:\n",
         "            pii_types.append('social_media')\n",
-        "            # Check grooming\n",
-        "            grooming_risk = sum(1 for kw in self.grooming_keywords if kw in text_lower)\n",
-        "            \n",
         "            if age < 13:\n",
         "                return {'blocked': True, 'reason': 'Social media not allowed under 13', 'pii': pii_types}\n",
         "            elif grooming_risk > 0:\n",
         "                return {'blocked': True, 'reason': f'Potential grooming (risk: {grooming_risk})', 'pii': pii_types}\n",
-        "        \n",
-        "        if pii_types:\n",
-        "            return {'blocked': True, 'reason': f'PII detected: {pii_types}', 'pii': pii_types}\n",
         "        \n",
         "        return {'blocked': False, 'reason': 'No PII', 'pii': []}\n",
         "\n",
         "pii_detector = PIIDetector()\n",
-        "print(\"✅ PII detector ready\")"
       ]
     },
     {
@@ -170,10 +169,8 @@
       "source": [
         "# @title 5. Combined Filter Function\n",
         "def check_content(text, age):\n",
-        "    \"\"\"\n",
-        "    Combined content moderation + PII check\n",
-        "    Returns: {allowed, reason, content_label, pii_result}\n",
-        "    \"\"\"\n",
         "    # Step 1: PII Check\n",
         "    pii_result = pii_detector.scan(text, age)\n",
         "    if pii_result['blocked']:\n",
@@ -191,8 +188,8 @@
         "    label = ContentLabel(prediction)\n",
         "    \n",
         "    # Age-based rules\n",
-        "    under_13_blocked = [1, 2, 3, 4, 5]\n",
-        "    teen_plus_blocked = [1, 3, 4, 5]\n",
         "    \n",
         "    if age >= 13:\n",
         "        allowed = label.value not in teen_plus_blocked\n",
@@ -214,7 +211,7 @@
         "        'violation': 'CONTENT' if not allowed else None,\n",
         "        'label': label.name,\n",
         "        'confidence': confidence,\n",
-        "        'pii': []\n",
         "    }\n",
         "\n",
         "print(\"✅ Combined filter ready\")"
@@ -226,11 +223,45 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 6. Speed Test - Single Inference\n",
         "test_text = \"damn that's crazy\"\n",
         "\n",
         "# Warm up\n",
-        "_ = pipeline.predict([test_text])\n",
         "\n",
         "# Time single inference\n",
         "times = []\n",
@@ -241,7 +272,7 @@
         "    times.append((end - start) * 1000)\n",
         "\n",
         "avg_time = sum(times) / len(times)\n",
-        "print(f\"📊 Single Inference Speed (100 runs)\")\n",
         "print(f\"   Average: {avg_time:.3f} ms\")\n",
         "print(f\"   Min: {min(times):.3f} ms\")\n",
         "print(f\"   Max: {max(times):.3f} ms\")\n",
@@ -254,7 +285,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 7. Dual-Mode Comparison Test\n",
         "test_cases = [\n",
         "    (\"that was a great game\", 10),\n",
         "    (\"that was a great game\", 15),\n",
@@ -262,48 +293,17 @@
         "    (\"shit that sucks\", 15),\n",
         "    (\"you're a piece of shit\", 15),\n",
         "    (\"kill yourself\", 12),\n",
-        "    (\"My email is test@gmail.com\", 16),\n",
-        "    (\"Follow me on instagram @user\", 14),\n",
-        "    (\"DM me privately\", 14),\n",
         "    (\"damn that's crazy\", 10),\n",
         "]\n",
         "\n",
-        "print(\"📋 Dual-Mode + PII Filter Results\\n\")\n",
-        "print(f\"{'Text':<35} {'Age':<6} {'Status':<10} {'Reason':<30}\")\n",
-        "print(\"-\" * 85)\n",
         "\n",
         "for text, age in test_cases:\n",
         "    result = check_content(text, age)\n",
         "    status = \"✅ ALLOW\" if result['allowed'] else \"❌ BLOCK\"\n",
-        "    print(f\"{text:<35} {age:<6} {status:<10} {result['reason'][:28]:<30}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# @title 8. PII Detection Specific Test\n",
-        "pii_tests = [\n",
-        "    (\"Contact me at john@example.com\", 15),\n",
-        "    (\"Call me 555-123-4567\", 16),\n",
-        "    (\"I'm at 123 Main Street\", 14),\n",
-        "    (\"My credit card is 4111-1111-1111-1111\", 15),\n",
-        "    (\"Follow my instagram @cool\", 10),\n",
-        "    (\"Follow my instagram @cool\", 15),\n",
-        "    (\"DM me on snapchat, it's secret\", 15),\n",
-        "    (\"Check my tiktok\", 16),\n",
-        "]\n",
-        "\n",
-        "print(\"🔒 PII Detection Results\\n\")\n",
-        "for text, age in pii_tests:\n",
-        "    result = check_content(text, age)\n",
-        "    status = \"✅\" if result['allowed'] else \"❌\"\n",
-        "    pii_info = f\"PII: {result.get('pii', [])}\" if result.get('pii') else \"\"\n",
-        "    print(f\"{status} Age {age}: {text[:40]}...\")\n",
-        "    print(f\"   → {result['reason']} {pii_info}\")\n",
-        "    print()"
       ]
     },
     {
@@ -317,11 +317,11 @@
         "    \"that was a great game\",\n",
         "    \"shit that sucks\",\n",
         "    \"you're awesome\",\n",
-        "    \"damn good job\",\n",
         "    \"My email is test@test.com\",\n",
         "    \"Follow me on instagram\",\n",
         "    \"kill yourself\",\n",
         "    \"nice work\",\n",
         "] * 50  # 400 texts\n",
         "\n",
         "ages = [15] * len(batch_texts)\n",
@@ -348,34 +348,24 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 10. Memory Usage\n",
-        "import sys\n",
-        "model_size = sys.getsizeof(pipeline) / 1024 / 1024\n",
-        "print(f\"💾 Model memory: ~{model_size:.2f} MB\")\n",
-        "print(f\"⚡ Running on: CPU (sklearn)\")\n",
-        "print(f\"✅ PII detection: Regex-based (fast)\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## 📊 Expected Results\n",
-        "\n",
-        "On Google Colab (CPU):\n",
-        "- **Single inference:** ~1-3ms\n",
-        "- **With PII check:** ~2-5ms\n",
-        "- **Batch throughput:** ~300-500 texts/second\n",
-        "- **Memory:** ~10-20MB\n",
-        "\n",
-        "## 🔗 Links\n",
-        "\n",
-        "- **Model:** https://huggingface.co/darwinkernelpanic/moderat\n",
-        "- **Features:**\n",
-        "  - Content moderation (6 categories)\n",
-        "  - PII detection (email, phone, address)\n",
-        "  - Social media protection (age-based)\n",
-        "  - Grooming detection (13+ mode)"
       ]
     }
   ]

         "    HATE_SPEECH = 4\n",
         "    SPAM = 5\n",
         "\n",
         "print(\"✅ Setup complete\")"
       ]
     },
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 4. PII Detector Class (FIXED)\n",
         "class PIIDetector:\n",
+        "    \"\"\"Detect PII with proper age-based social media rules\"\"\"\n",
         "    \n",
         "    def __init__(self):\n",
         "        self.email_pattern = re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b')\n",
         "            re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),\n",
         "            re.compile(r'\\b\\(\\d{3}\\)\\s?\\d{3}[-.]?\\d{4}\\b'),\n",
         "            re.compile(r'\\b\\d{4}\\s?\\d{3}\\s?\\d{3}\\b'),\n",
+        "            re.compile(r'\\b\\d{3}[-.]?\\d{4}\\b'),\n",
         "        ]\n",
+        "        self.address_pattern = re.compile(r'\\b\\d+\\s+[A-Za-z]+\\s+(?:Street|St|Avenue|Ave|Road|Rd|Lane|Ln|Drive|Dr)\\b', re.IGNORECASE)\n",
+        "        self.cc_pattern = re.compile(r'\\b(?:\\d{4}[-\\s]?){3}\\d{4}\\b|\\b\\d{16}\\b')\n",
+        "        self.social_media_domains = ['instagram.com', 'instagr.am', 'twitter.com', 'x.com', 'tiktok.com', 'snapchat.com', 'discord.com', 'discord.gg']\n",
+        "        self.grooming_keywords = ['dm me', 'private chat', 'dont tell your parents', 'secret', 'send me pics', 'our little secret', 'meet up']\n",
         "    \n",
         "    def scan(self, text, age):\n",
         "        pii_types = []\n",
+        "        text_lower = text.lower()\n",
         "        \n",
         "        # Check email\n",
         "        if self.email_pattern.search(text):\n",
         "                pii_types.append('phone')\n",
         "                break\n",
         "        \n",
+        "        # Check address\n",
+        "        if self.address_pattern.search(text):\n",
+        "            pii_types.append('address')\n",
+        "        \n",
+        "        # Check credit card\n",
+        "        if self.cc_pattern.search(text):\n",
+        "            pii_types.append('credit_card')\n",
+        "        \n",
+        "        # Check grooming\n",
+        "        grooming_risk = sum(1 for kw in self.grooming_keywords if kw in text_lower)\n",
+        "        \n",
+        "        # Priority: Critical PII first (blocked for all ages)\n",
+        "        if any(pii in ['email', 'phone', 'address', 'credit_card'] for pii in pii_types):\n",
+        "            return {'blocked': True, 'reason': f'PII detected: {pii_types}', 'pii': pii_types}\n",
+        "        \n",
+        "        # Social media check\n",
         "        has_social = any(domain in text_lower for domain in self.social_media_domains)\n",
         "        has_social = has_social or any(x in text_lower for x in ['instagram', 'snapchat', 'discord', 'tiktok'])\n",
         "        \n",
         "        if has_social:\n",
         "            pii_types.append('social_media')\n",
         "            if age < 13:\n",
         "                return {'blocked': True, 'reason': 'Social media not allowed under 13', 'pii': pii_types}\n",
         "            elif grooming_risk > 0:\n",
         "                return {'blocked': True, 'reason': f'Potential grooming (risk: {grooming_risk})', 'pii': pii_types}\n",
+        "            else:\n",
+        "                return {'blocked': False, 'reason': 'Social media OK for 13+', 'pii': pii_types}\n",
         "        \n",
         "        return {'blocked': False, 'reason': 'No PII', 'pii': []}\n",
         "\n",
         "pii_detector = PIIDetector()\n",
+        "print(\"✅ PII detector ready (FIXED)\")"
       ]
     },
     {
       "source": [
         "# @title 5. Combined Filter Function\n",
         "def check_content(text, age):\n",
+        "    \"\"\"Combined content moderation + PII check\"\"\"\n",
+        "    \n",
         "    # Step 1: PII Check\n",
         "    pii_result = pii_detector.scan(text, age)\n",
         "    if pii_result['blocked']:\n",
         "    label = ContentLabel(prediction)\n",
         "    \n",
         "    # Age-based rules\n",
+        "    under_13_blocked = [1, 2, 3, 4, 5]  # All except SAFE\n",
+        "    teen_plus_blocked = [1, 3, 4, 5]    # Allow SWEARING_REACTION\n",
         "    \n",
         "    if age >= 13:\n",
         "        allowed = label.value not in teen_plus_blocked\n",
         "        'violation': 'CONTENT' if not allowed else None,\n",
         "        'label': label.name,\n",
         "        'confidence': confidence,\n",
+        "        'pii': pii_result.get('pii', [])\n",
         "    }\n",
         "\n",
         "print(\"✅ Combined filter ready\")"
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 6. PII Detection Tests (FIXED)\n",
+        "print(\"🔒 PII Detection Results (Fixed)\\n\")\n",
+        "print(\"Expected: Address and Credit Card now detected correctly\")\n",
+        "print(\"Expected: Social media ALLOWED for 13+ (unless grooming)\\n\")\n",
+        "print(\"=\"*70)\n",
+        "\n",
+        "pii_tests = [\n",
+        "    (\"Contact me at john@example.com\", 15, \"Email - should block\"),\n",
+        "    (\"Call me 555-123-4567\", 16, \"Phone - should block\"),\n",
+        "    (\"I'm at 123 Main Street\", 14, \"Address - should block\"),\n",
+        "    (\"My credit card is 4111-1111-1111-1111\", 15, \"Credit Card - should block\"),\n",
+        "    (\"Follow my instagram @cool\", 10, \"Social <13 - should block\"),\n",
+        "    (\"Follow my instagram @cool\", 15, \"Social 13+ - should ALLOW\"),\n",
+        "    (\"DM me on snapchat, it's secret\", 15, \"Grooming - should block\"),\n",
+        "    (\"Check my tiktok\", 16, \"Social 16+ - should ALLOW\"),\n",
+        "]\n",
+        "\n",
+        "for text, age, note in pii_tests:\n",
+        "    result = check_content(text, age)\n",
+        "    status = \"✅\" if result['allowed'] else \"❌\"\n",
+        "    print(f\"{status} Age {age}: {text[:45]}\")\n",
+        "    print(f\"   → {result['reason']}\")\n",
+        "    if result.get('pii'):\n",
+        "        print(f\"   PII: {result['pii']}\")\n",
+        "    print(f\"   Note: {note}\")\n",
+        "    print()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# @title 7. Speed Test - Single Inference\n",
         "test_text = \"damn that's crazy\"\n",
         "\n",
         "# Warm up\n",
+        "_ = check_content(test_text, 15)\n",
         "\n",
         "# Time single inference\n",
         "times = []\n",
         "    times.append((end - start) * 1000)\n",
         "\n",
         "avg_time = sum(times) / len(times)\n",
+        "print(f\"📊 Single Inference Speed (100 runs, with PII check)\")\n",
         "print(f\"   Average: {avg_time:.3f} ms\")\n",
         "print(f\"   Min: {min(times):.3f} ms\")\n",
         "print(f\"   Max: {max(times):.3f} ms\")\n",
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 8. Dual-Mode Content Test\n",
         "test_cases = [\n",
         "    (\"that was a great game\", 10),\n",
         "    (\"that was a great game\", 15),\n",
         "    (\"shit that sucks\", 15),\n",
         "    (\"you're a piece of shit\", 15),\n",
         "    (\"kill yourself\", 12),\n",
         "    (\"damn that's crazy\", 10),\n",
         "]\n",
         "\n",
+        "print(\"📋 Dual-Mode Content Results\\n\")\n",
+        "print(f\"{'Text':<30} {'Age':<6} {'Status':<10} {'Reason':<30}\")\n",
+        "print(\"-\" * 80)\n",
         "\n",
         "for text, age in test_cases:\n",
         "    result = check_content(text, age)\n",
         "    status = \"✅ ALLOW\" if result['allowed'] else \"❌ BLOCK\"\n",
+        "    print(f\"{text:<30} {age:<6} {status:<10} {result['reason'][:28]:<30}\")"
       ]
     },
     {
         "    \"that was a great game\",\n",
         "    \"shit that sucks\",\n",
         "    \"you're awesome\",\n",
         "    \"My email is test@test.com\",\n",
         "    \"Follow me on instagram\",\n",
         "    \"kill yourself\",\n",
         "    \"nice work\",\n",
+        "    \"Check my tiktok\",\n",
         "] * 50  # 400 texts\n",
         "\n",
         "ages = [15] * len(batch_texts)\n",
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 10. Summary\n",
+        "print(\"📊 moderat Summary\")\n",
+        "print(\"=\"*60)\n",
+        "print(\"\")\n",
+        "print(\"✅ Content Moderation:\")\n",
+        "print(\"   - 6 categories (Safe, Harassment, Swearing, Hate, Spam)\")\n",
+        "print(\"   - Dual-mode: <13 strict, 13+ laxed\")\n",
+        "print(\"\")\n",
+        "print(\"✅ PII Detection:\")\n",
+        "print(\"   - Email, Phone, Address, Credit Card (all ages blocked)\")\n",
+        "print(\"   - Social Media: <13 blocked, 13+ allowed\")\n",
+        "print(\"   - Grooming detection for 13+\")\n",
+        "print(\"\")\n",
+        "print(\"📈 Speed:\")\n",
+        "print(\"   - ~3-7ms per inference (with PII)\")\n",
+        "print(\"   - ~200-500 texts/sec batch\")\n",
+        "print(\"\")\n",
+        "print(\"🔗 https://huggingface.co/darwinkernelpanic/moderat\")"
       ]
     }
   ]