darwinkernelpanic
/

moderat

@@ -18,7 +18,7 @@
       "source": [
         "# 🛡️ moderat - Speed Test & Benchmark\n",
         "\n",
-        "Test inference speeds for the dual-mode content moderation model.\n",
         "\n",
         "**Model:** [darwinkernelpanic/moderat](https://huggingface.co/darwinkernelpanic/moderat)"
       ]
@@ -39,7 +39,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 2. Download model from Hugging Face\n",
         "from huggingface_hub import hf_hub_download\n",
         "import pickle\n",
         "\n",
@@ -51,11 +51,13 @@
         "    filename=\"moderation_model.pkl\"\n",
         ")\n",
         "\n",
-        "# Load model\n",
-        "with open(model_path, 'rb') as f:\n",
-        "    pipeline = pickle.load(f)\n",
         "\n",
-        "print(f\"✅ Model loaded from {MODEL_REPO}\")"
       ]
     },
     {
@@ -64,10 +66,19 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 3. Define inference functions\n",
         "from enum import Enum\n",
         "import time\n",
         "\n",
         "class ContentLabel(Enum):\n",
         "    SAFE = 0\n",
         "    HARASSMENT = 1\n",
@@ -76,17 +87,110 @@
         "    HATE_SPEECH = 4\n",
         "    SPAM = 5\n",
         "\n",
-        "def predict(text):\n",
-        "    \"\"\"Run inference and return label + confidence\"\"\"\n",
         "    prediction = pipeline.predict([text])[0]\n",
         "    probs = pipeline.predict_proba([text])[0]\n",
         "    confidence = max(probs)\n",
-        "    return ContentLabel(prediction), confidence\n",
-        "\n",
-        "def check_content(text, age):\n",
-        "    \"\"\"Dual-mode filter\"\"\"\n",
-        "    label, confidence = predict(text)\n",
         "    \n",
         "    under_13_blocked = [1, 2, 3, 4, 5]\n",
         "    teen_plus_blocked = [1, 3, 4, 5]\n",
         "    \n",
@@ -98,12 +202,22 @@
         "    # Allow reaction swearing for 13+\n",
         "    if not allowed and label == ContentLabel.SWEARING_REACTION and age >= 13:\n",
         "        allowed = True\n",
         "    \n",
         "    return {\n",
-        "        \"allowed\": allowed,\n",
-        "        \"label\": label.name,\n",
-        "        \"confidence\": confidence\n",
-        "    }"
       ]
     },
     {
@@ -112,28 +226,25 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 4. Single inference speed test\n",
         "test_text = \"damn that's crazy\"\n",
         "\n",
         "# Warm up\n",
-        "_ = predict(test_text)\n",
         "\n",
         "# Time single inference\n",
         "times = []\n",
         "for _ in range(100):\n",
         "    start = time.perf_counter()\n",
-        "    result = predict(test_text)\n",
         "    end = time.perf_counter()\n",
-        "    times.append((end - start) * 1000)  # Convert to ms\n",
         "\n",
         "avg_time = sum(times) / len(times)\n",
-        "min_time = min(times)\n",
-        "max_time = max(times)\n",
-        "\n",
         "print(f\"📊 Single Inference Speed (100 runs)\")\n",
         "print(f\"   Average: {avg_time:.3f} ms\")\n",
-        "print(f\"   Min: {min_time:.3f} ms\")\n",
-        "print(f\"   Max: {max_time:.3f} ms\")\n",
         "print(f\"   Throughput: {1000/avg_time:.1f} inferences/second\")"
       ]
     },
@@ -143,31 +254,28 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 5. Batch inference speed test\n",
-        "test_texts = [\n",
-        "    \"that was a great game\",\n",
-        "    \"shit that sucks\",\n",
-        "    \"you're a piece of shit\",\n",
-        "    \"kill yourself\",\n",
-        "    \"i love this song\",\n",
-        "    \"damn that's crazy\",\n",
-        "    \"click here for free robux\",\n",
-        "    \"congratulations\",\n",
-        "] * 100  # 800 total texts\n",
-        "\n",
-        "print(f\"Testing batch of {len(test_texts)} texts...\")\n",
-        "\n",
-        "start = time.perf_counter()\n",
-        "results = [predict(t) for t in test_texts]\n",
-        "end = time.perf_counter()\n",
         "\n",
-        "total_time = (end - start) * 1000\n",
-        "avg_per_text = total_time / len(test_texts)\n",
         "\n",
-        "print(f\"\\n📊 Batch Inference Results\")\n",
-        "print(f\"   Total time: {total_time:.1f} ms\")\n",
-        "print(f\"   Average per text: {avg_per_text:.3f} ms\")\n",
-        "print(f\"   Throughput: {len(test_texts)/(total_time/1000):.1f} texts/second\")"
       ]
     },
     {
@@ -176,26 +284,26 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 6. Dual-mode comparison test\n",
-        "test_cases = [\n",
-        "    (\"that was a great game\", 10),\n",
-        "    (\"that was a great game\", 15),\n",
-        "    (\"shit that sucks\", 10),\n",
-        "    (\"shit that sucks\", 15),\n",
-        "    (\"you're a piece of shit\", 10),\n",
-        "    (\"you're a piece of shit\", 15),\n",
-        "    (\"kill yourself\", 10),\n",
-        "    (\"kill yourself\", 15),\n",
         "]\n",
         "\n",
-        "print(\"📋 Dual-Mode Filter Results\\n\")\n",
-        "print(f\"{'Text':<30} {'Age':<6} {'Status':<10} {'Label':<20} {'Conf':<6}\")\n",
-        "print(\"-\" * 75)\n",
-        "\n",
-        "for text, age in test_cases:\n",
         "    result = check_content(text, age)\n",
-        "    status = \"✅ ALLOW\" if result[\"allowed\"] else \"❌ BLOCK\"\n",
-        "    print(f\"{text:<30} {age:<6} {status:<10} {result['label']:<20} {result['confidence']:.2f}\")"
       ]
     },
     {
@@ -204,18 +312,48 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# @title 7. Memory usage check\n",
-        "import sys\n",
         "\n",
-        "# Estimate model size in memory\n",
-        "model_size = sys.getsizeof(pipeline) / 1024 / 1024\n",
-        "print(f\"💾 Model memory usage: ~{model_size:.2f} MB\")\n",
         "\n",
-        "# Check if GPU available (Colab usually has CPU only for sklearn)\n",
-        "import os\n",
-        "gpu_available = 'COLAB_GPU' in os.environ\n",
-        "print(f\"🔥 GPU available: {gpu_available}\")\n",
-        "print(f\"⚡ Running on: CPU (sklearn uses CPU)\")"
       ]
     },
     {
@@ -225,14 +363,19 @@
         "## 📊 Expected Results\n",
         "\n",
         "On Google Colab (CPU):\n",
-        "- **Single inference:** ~0.5-2ms\n",
-        "- **Throughput:** ~500-2000 inferences/second\n",
-        "- **Memory:** ~5-15MB\n",
         "\n",
         "## 🔗 Links\n",
         "\n",
-        "- Model: https://huggingface.co/darwinkernelpanic/moderat\n",
-        "- GitHub: Add your repo here"
       ]
     }
   ]

       "source": [
         "# 🛡️ moderat - Speed Test & Benchmark\n",
         "\n",
+        "Test inference speeds for the dual-mode content moderation model with PII detection.\n",
         "\n",
         "**Model:** [darwinkernelpanic/moderat](https://huggingface.co/darwinkernelpanic/moderat)"
       ]
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 2. Download model and files from Hugging Face\n",
         "from huggingface_hub import hf_hub_download\n",
         "import pickle\n",
         "\n",
         "    filename=\"moderation_model.pkl\"\n",
         ")\n",
         "\n",
+        "# Download PII extension\n",
+        "pii_path = hf_hub_download(\n",
+        "    repo_id=MODEL_REPO,\n",
+        "    filename=\"pii_extension.py\"\n",
+        ")\n",
         "\n",
+        "print(f\"✅ Model and PII extension downloaded from {MODEL_REPO}\")"
       ]
     },
     {
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 3. Import and setup\n",
+        "import sys\n",
+        "sys.path.insert(0, pii_path.replace('/pii_extension.py', ''))\n",
+        "\n",
         "from enum import Enum\n",
         "import time\n",
+        "import re\n",
+        "\n",
+        "# Load model\n",
+        "with open(model_path, 'rb') as f:\n",
+        "    pipeline = pickle.load(f)\n",
         "\n",
+        "# Define enums\n",
         "class ContentLabel(Enum):\n",
         "    SAFE = 0\n",
         "    HARASSMENT = 1\n",
         "    HATE_SPEECH = 4\n",
         "    SPAM = 5\n",
         "\n",
+        "class PIILabel(Enum):\n",
+        "    SAFE = \"safe\"\n",
+        "    EMAIL = \"email\"\n",
+        "    PHONE = \"phone\"\n",
+        "    ADDRESS = \"address\"\n",
+        "    CREDIT_CARD = \"credit_card\"\n",
+        "    SSN = \"ssn\"\n",
+        "    SOCIAL_MEDIA = \"social_media\"\n",
+        "\n",
+        "print(\"✅ Setup complete\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# @title 4. PII Detector Class\n",
+        "class PIIDetector:\n",
+        "    \"\"\"Detect PII in text\"\"\"\n",
+        "    \n",
+        "    def __init__(self):\n",
+        "        self.email_pattern = re.compile(r'\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b')\n",
+        "        self.phone_patterns = [\n",
+        "            re.compile(r'\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b'),\n",
+        "            re.compile(r'\\b\\(\\d{3}\\)\\s?\\d{3}[-.]?\\d{4}\\b'),\n",
+        "            re.compile(r'\\b\\d{4}\\s?\\d{3}\\s?\\d{3}\\b'),\n",
+        "        ]\n",
+        "        self.social_media_domains = [\n",
+        "            'instagram.com', 'instagr.am', 'twitter.com', 'x.com',\n",
+        "            'tiktok.com', 'snapchat.com', 'discord.com', 'discord.gg'\n",
+        "        ]\n",
+        "        self.grooming_keywords = [\n",
+        "            'dm me', 'private chat', 'dont tell your parents', 'secret',\n",
+        "            'send me pics', 'our little secret', 'meet up'\n",
+        "        ]\n",
+        "    \n",
+        "    def scan(self, text, age):\n",
+        "        pii_types = []\n",
+        "        \n",
+        "        # Check email\n",
+        "        if self.email_pattern.search(text):\n",
+        "            pii_types.append('email')\n",
+        "        \n",
+        "        # Check phone\n",
+        "        for pattern in self.phone_patterns:\n",
+        "            if pattern.search(text):\n",
+        "                pii_types.append('phone')\n",
+        "                break\n",
+        "        \n",
+        "        # Check social media\n",
+        "        text_lower = text.lower()\n",
+        "        has_social = any(domain in text_lower for domain in self.social_media_domains)\n",
+        "        has_social = has_social or any(x in text_lower for x in ['instagram', 'snapchat', 'discord', 'tiktok'])\n",
+        "        \n",
+        "        if has_social:\n",
+        "            pii_types.append('social_media')\n",
+        "            # Check grooming\n",
+        "            grooming_risk = sum(1 for kw in self.grooming_keywords if kw in text_lower)\n",
+        "            \n",
+        "            if age < 13:\n",
+        "                return {'blocked': True, 'reason': 'Social media not allowed under 13', 'pii': pii_types}\n",
+        "            elif grooming_risk > 0:\n",
+        "                return {'blocked': True, 'reason': f'Potential grooming (risk: {grooming_risk})', 'pii': pii_types}\n",
+        "        \n",
+        "        if pii_types:\n",
+        "            return {'blocked': True, 'reason': f'PII detected: {pii_types}', 'pii': pii_types}\n",
+        "        \n",
+        "        return {'blocked': False, 'reason': 'No PII', 'pii': []}\n",
+        "\n",
+        "pii_detector = PIIDetector()\n",
+        "print(\"✅ PII detector ready\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# @title 5. Combined Filter Function\n",
+        "def check_content(text, age):\n",
+        "    \"\"\"\n",
+        "    Combined content moderation + PII check\n",
+        "    Returns: {allowed, reason, content_label, pii_result}\n",
+        "    \"\"\"\n",
+        "    # Step 1: PII Check\n",
+        "    pii_result = pii_detector.scan(text, age)\n",
+        "    if pii_result['blocked']:\n",
+        "        return {\n",
+        "            'allowed': False,\n",
+        "            'reason': pii_result['reason'],\n",
+        "            'violation': 'PII',\n",
+        "            'pii': pii_result['pii']\n",
+        "        }\n",
+        "    \n",
+        "    # Step 2: Content Moderation\n",
         "    prediction = pipeline.predict([text])[0]\n",
         "    probs = pipeline.predict_proba([text])[0]\n",
         "    confidence = max(probs)\n",
+        "    label = ContentLabel(prediction)\n",
         "    \n",
+        "    # Age-based rules\n",
         "    under_13_blocked = [1, 2, 3, 4, 5]\n",
         "    teen_plus_blocked = [1, 3, 4, 5]\n",
         "    \n",
         "    # Allow reaction swearing for 13+\n",
         "    if not allowed and label == ContentLabel.SWEARING_REACTION and age >= 13:\n",
         "        allowed = True\n",
+        "        reason = 'Swearing permitted as reaction (13+)'\n",
+        "    elif not allowed:\n",
+        "        reason = f'{label.name} detected'\n",
+        "    else:\n",
+        "        reason = 'Safe'\n",
         "    \n",
         "    return {\n",
+        "        'allowed': allowed,\n",
+        "        'reason': reason,\n",
+        "        'violation': 'CONTENT' if not allowed else None,\n",
+        "        'label': label.name,\n",
+        "        'confidence': confidence,\n",
+        "        'pii': []\n",
+        "    }\n",
+        "\n",
+        "print(\"✅ Combined filter ready\")"
       ]
     },
     {
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 6. Speed Test - Single Inference\n",
         "test_text = \"damn that's crazy\"\n",
         "\n",
         "# Warm up\n",
+        "_ = pipeline.predict([test_text])\n",
         "\n",
         "# Time single inference\n",
         "times = []\n",
         "for _ in range(100):\n",
         "    start = time.perf_counter()\n",
+        "    result = check_content(test_text, 15)\n",
         "    end = time.perf_counter()\n",
+        "    times.append((end - start) * 1000)\n",
         "\n",
         "avg_time = sum(times) / len(times)\n",
         "print(f\"📊 Single Inference Speed (100 runs)\")\n",
         "print(f\"   Average: {avg_time:.3f} ms\")\n",
+        "print(f\"   Min: {min(times):.3f} ms\")\n",
+        "print(f\"   Max: {max(times):.3f} ms\")\n",
         "print(f\"   Throughput: {1000/avg_time:.1f} inferences/second\")"
       ]
     },
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 7. Dual-Mode Comparison Test\n",
+        "test_cases = [\n",
+        "    (\"that was a great game\", 10),\n",
+        "    (\"that was a great game\", 15),\n",
+        "    (\"shit that sucks\", 10),\n",
+        "    (\"shit that sucks\", 15),\n",
+        "    (\"you're a piece of shit\", 15),\n",
+        "    (\"kill yourself\", 12),\n",
+        "    (\"My email is test@gmail.com\", 16),\n",
+        "    (\"Follow me on instagram @user\", 14),\n",
+        "    (\"DM me privately\", 14),\n",
+        "    (\"damn that's crazy\", 10),\n",
+        "]\n",
         "\n",
+        "print(\"📋 Dual-Mode + PII Filter Results\\n\")\n",
+        "print(f\"{'Text':<35} {'Age':<6} {'Status':<10} {'Reason':<30}\")\n",
+        "print(\"-\" * 85)\n",
         "\n",
+        "for text, age in test_cases:\n",
+        "    result = check_content(text, age)\n",
+        "    status = \"✅ ALLOW\" if result['allowed'] else \"❌ BLOCK\"\n",
+        "    print(f\"{text:<35} {age:<6} {status:<10} {result['reason'][:28]:<30}\")"
       ]
     },
     {
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 8. PII Detection Specific Test\n",
+        "pii_tests = [\n",
+        "    (\"Contact me at john@example.com\", 15),\n",
+        "    (\"Call me 555-123-4567\", 16),\n",
+        "    (\"I'm at 123 Main Street\", 14),\n",
+        "    (\"My credit card is 4111-1111-1111-1111\", 15),\n",
+        "    (\"Follow my instagram @cool\", 10),\n",
+        "    (\"Follow my instagram @cool\", 15),\n",
+        "    (\"DM me on snapchat, it's secret\", 15),\n",
+        "    (\"Check my tiktok\", 16),\n",
         "]\n",
         "\n",
+        "print(\"🔒 PII Detection Results\\n\")\n",
+        "for text, age in pii_tests:\n",
         "    result = check_content(text, age)\n",
+        "    status = \"✅\" if result['allowed'] else \"❌\"\n",
+        "    pii_info = f\"PII: {result.get('pii', [])}\" if result.get('pii') else \"\"\n",
+        "    print(f\"{status} Age {age}: {text[:40]}...\")\n",
+        "    print(f\"   → {result['reason']} {pii_info}\")\n",
+        "    print()"
       ]
     },
     {
       "metadata": {},
       "outputs": [],
       "source": [
+        "# @title 9. Batch Processing Speed Test\n",
+        "batch_texts = [\n",
+        "    \"that was a great game\",\n",
+        "    \"shit that sucks\",\n",
+        "    \"you're awesome\",\n",
+        "    \"damn good job\",\n",
+        "    \"My email is test@test.com\",\n",
+        "    \"Follow me on instagram\",\n",
+        "    \"kill yourself\",\n",
+        "    \"nice work\",\n",
+        "] * 50  # 400 texts\n",
         "\n",
+        "ages = [15] * len(batch_texts)\n",
+        "\n",
+        "print(f\"Processing batch of {len(batch_texts)} texts...\")\n",
+        "start = time.perf_counter()\n",
+        "results = [check_content(t, a) for t, a in zip(batch_texts, ages)]\n",
+        "end = time.perf_counter()\n",
+        "\n",
+        "total_time = (end - start) * 1000\n",
+        "print(f\"\\n📊 Batch Results\")\n",
+        "print(f\"   Total time: {total_time:.1f} ms\")\n",
+        "print(f\"   Average: {total_time/len(batch_texts):.3f} ms/text\")\n",
+        "print(f\"   Throughput: {len(batch_texts)/(total_time/1000):.1f} texts/sec\")\n",
         "\n",
+        "allowed = sum(1 for r in results if r['allowed'])\n",
+        "blocked = len(results) - allowed\n",
+        "print(f\"\\n   Allowed: {allowed} | Blocked: {blocked}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# @title 10. Memory Usage\n",
+        "import sys\n",
+        "model_size = sys.getsizeof(pipeline) / 1024 / 1024\n",
+        "print(f\"💾 Model memory: ~{model_size:.2f} MB\")\n",
+        "print(f\"⚡ Running on: CPU (sklearn)\")\n",
+        "print(f\"✅ PII detection: Regex-based (fast)\")"
       ]
     },
     {
         "## 📊 Expected Results\n",
         "\n",
         "On Google Colab (CPU):\n",
+        "- **Single inference:** ~1-3ms\n",
+        "- **With PII check:** ~2-5ms\n",
+        "- **Batch throughput:** ~300-500 texts/second\n",
+        "- **Memory:** ~10-20MB\n",
         "\n",
         "## 🔗 Links\n",
         "\n",
+        "- **Model:** https://huggingface.co/darwinkernelpanic/moderat\n",
+        "- **Features:**\n",
+        "  - Content moderation (6 categories)\n",
+        "  - PII detection (email, phone, address)\n",
+        "  - Social media protection (age-based)\n",
+        "  - Grooming detection (13+ mode)"
       ]
     }
   ]