darwinkernelpanic
/

moderat

+#!/usr/bin/env python3
+"""
+PII (Personally Identifiable Information) Detection Extension
+Integrates with dual-mode content moderation
+"""
+import re
+from enum import Enum
+from typing import Dict, List, Tuple
+class PIILabel(Enum):
+    SAFE = "safe"
+    EMAIL = "email"
+    PHONE = "phone"
+    ADDRESS = "address"
+    CREDIT_CARD = "credit_card"
+    SSN = "ssn"
+    SOCIAL_MEDIA = "social_media"
+    URL = "url"
+class PIIDetector:
+    """Detect PII in text with context awareness"""
+    def __init__(self):
+        # Email pattern
+        self.email_pattern = re.compile(
+            r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+        )
+        # Phone patterns (various formats)
+        self.phone_patterns = [
+            re.compile(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'),  # US: 123-456-7890
+            re.compile(r'\b\(\d{3}\)\s?\d{3}[-.]?\d{4}\b'),  # (123) 456-7890
+            re.compile(r'\b\+?\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'),  # International
+            re.compile(r'\b\d{4}\s?\d{3}\s?\d{3}\b'),  # AU: 0412 345 678
+            re.compile(r'\b\d{3}[-.]?\d{4}\b'),  # Short: 555-1234
+            re.compile(r'\b\d{7,10}\b'),  # Plain digits 7-10 chars
+        ]
+        # Address patterns (basic street address detection)
+        self.address_patterns = [
+            re.compile(r'\b\d+\s+[A-Za-z]+\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\b', re.IGNORECASE),
+            re.compile(r'\b(?:PO|P\.O\.)\s*Box\s*\d+\b', re.IGNORECASE),
+        ]
+        # Credit card (basic pattern - matches common formats)
+        self.cc_pattern = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b')
+        # SSN (US Social Security Number)
+        self.ssn_pattern = re.compile(r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b')
+        # Social media links/platforms
+        self.social_media_domains = [
+            'instagram.com', 'instagr.am',
+            'twitter.com', 'x.com',
+            'tiktok.com',
+            'snapchat.com', 'snap.com',
+            'discord.com', 'discord.gg',
+            'facebook.com', 'fb.com',
+            'reddit.com',
+            'youtube.com', 'youtu.be',
+            'twitch.tv',
+            'steamcommunity.com',
+            'roblox.com',
+        ]
+        # Grooming/suspicious keywords (context for social media sharing)
+        self.grooming_keywords = [
+            'dm me', 'message me privately', 'private chat', 'secret',
+            'dont tell your parents', 'our little secret', 'just between us',
+            'send me pics', 'send pictures', 'photo of you', 'what do you look like',
+            'how old are you', 'where do you live', 'home alone', 'parents gone',
+            'meet up', 'meet in person', 'come over', 'visit you',
+            'boyfriend', 'girlfriend', 'dating', 'relationship',
+            'trust me', 'special friend', 'mature for your age',
+            'youre different', 'understand you', 'only one who gets you',
+        ]
+        # URL pattern
+        self.url_pattern = re.compile(
+            r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?',
+            re.IGNORECASE
+        )
+    def detect_emails(self, text: str) -> List[Tuple[str, int, int]]:
+        """Find all emails in text"""
+        matches = []
+        for match in self.email_pattern.finditer(text):
+            matches.append((match.group(), match.start(), match.end()))
+        return matches
+    def detect_phones(self, text: str) -> List[Tuple[str, int, int]]:
+        """Find all phone numbers"""
+        matches = []
+        for pattern in self.phone_patterns:
+            for match in pattern.finditer(text):
+                matches.append((match.group(), match.start(), match.end()))
+        return matches
+    def detect_addresses(self, text: str) -> List[Tuple[str, int, int]]:
+        """Find addresses"""
+        matches = []
+        for pattern in self.address_patterns:
+            for match in pattern.finditer(text):
+                matches.append((match.group(), match.start(), match.end()))
+        return matches
+    def detect_credit_cards(self, text: str) -> List[Tuple[str, int, int]]:
+        """Find credit card numbers"""
+        matches = []
+        for match in self.cc_pattern.finditer(text):
+            card = match.group().replace('-', '').replace(' ', '')
+            if len(card) >= 13 and len(card) <= 19:  # Valid CC length
+                matches.append((match.group(), match.start(), match.end()))
+        return matches
+    def detect_ssn(self, text: str) -> List[Tuple[str, int, int]]:
+        """Find SSNs"""
+        matches = []
+        for match in self.ssn_pattern.finditer(text):
+            matches.append((match.group(), match.start(), match.end()))
+        return matches
+    def detect_social_media(self, text: str) -> List[Tuple[str, int, int, str]]:
+        """Find social media links with platform detection"""
+        matches = []
+        urls = self.url_pattern.finditer(text)
+        for url_match in urls:
+            url = url_match.group()
+            for domain in self.social_media_domains:
+                if domain.lower() in url.lower():
+                    matches.append((url, url_match.start(), url_match.end(), domain))
+                    break
+        # Also check for plain usernames like @username or discord: username
+        username_patterns = [
+            re.compile(r'\b(?:instagram|ig|insta)[:\s]*@?(\w+)\b', re.IGNORECASE),
+            re.compile(r'\b(?:twitter|x)[:\s]*@?(\w+)\b', re.IGNORECASE),
+            re.compile(r'\bdiscord[:\s]*@?(\w+)\b', re.IGNORECASE),
+            re.compile(r'\bsnapchat|snap[:\s]*@?(\w+)\b', re.IGNORECASE),
+            re.compile(r'\btiktok[:\s]*@?(\w+)\b', re.IGNORECASE),
+        ]
+        for pattern in username_patterns:
+            for match in pattern.finditer(text):
+                platform = match.group(0).split(':')[0].lower()
+                matches.append((match.group(), match.start(), match.end(), platform))
+        return matches
+    def detect_grooming_context(self, text: str) -> Tuple[bool, float, List[str]]:
+        """Detect if social media sharing has grooming context"""
+        text_lower = text.lower()
+        found_keywords = []
+        for keyword in self.grooming_keywords:
+            if keyword in text_lower:
+                found_keywords.append(keyword)
+        # Calculate risk score
+        risk_score = min(len(found_keywords) / 3.0, 1.0)  # Max at 3+ keywords
+        is_suspicious = risk_score >= 0.33  # 1+ keywords
+        return is_suspicious, risk_score, found_keywords
+    def scan(self, text: str, age: int) -> Dict:
+        """
+        Full PII scan with age-appropriate rules
+        Returns:
+            {
+                "has_pii": bool,
+                "pii_types": list,
+                "details": list,
+                "social_media_allowed": bool,
+                "grooming_risk": float,
+                "action": "allow" | "block" | "flag",
+                "reason": str
+            }
+        """
+        pii_found = []
+        pii_types = set()
+        # Detect various PII types
+        emails = self.detect_emails(text)
+        if emails:
+            pii_types.add(PIILabel.EMAIL)
+            for email, start, end in emails:
+                pii_found.append({"type": "email", "value": email, "start": start, "end": end})
+        phones = self.detect_phones(text)
+        if phones:
+            pii_types.add(PIILabel.PHONE)
+            for phone, start, end in phones:
+                pii_found.append({"type": "phone", "value": phone, "start": start, "end": end})
+        addresses = self.detect_addresses(text)
+        if addresses:
+            pii_types.add(PIILabel.ADDRESS)
+            for addr, start, end in addresses:
+                pii_found.append({"type": "address", "value": addr, "start": start, "end": end})
+        credit_cards = self.detect_credit_cards(text)
+        if credit_cards:
+            pii_types.add(PIILabel.CREDIT_CARD)
+            for cc, start, end in credit_cards:
+                pii_found.append({"type": "credit_card", "value": cc, "start": start, "end": end})
+        ssns = self.detect_ssn(text)
+        if ssns:
+            pii_types.add(PIILabel.SSN)
+            for ssn, start, end in ssns:
+                pii_found.append({"type": "ssn", "value": ssn, "start": start, "end": end})
+        # Social media detection
+        social_links = self.detect_social_media(text)
+        has_social_media = len(social_links) > 0
+        if has_social_media:
+            pii_types.add(PIILabel.SOCIAL_MEDIA)
+            for link, start, end, platform in social_links:
+                pii_found.append({"type": "social_media", "value": link, "platform": platform, "start": start, "end": end})
+        # Check grooming context for social media
+        grooming_risk = 0.0
+        grooming_keywords = []
+        social_media_allowed = True
+        if has_social_media:
+            is_grooming, grooming_risk, grooming_keywords = self.detect_grooming_context(text)
+            # Rules by age
+            if age < 13:
+                # Under 13: Block ALL social media sharing
+                social_media_allowed = False
+                action = "block"
+                reason = "Social media sharing not permitted under 13"
+            else:
+                # 13+: Allow but check for grooming
+                if is_grooming:
+                    social_media_allowed = False
+                    action = "block"
+                    reason = f"Potential grooming detected (risk: {grooming_risk:.0%})"
+                else:
+                    social_media_allowed = True
+                    action = "allow"
+                    reason = "Social media permitted for 13+ (no grooming signals)"
+        # Check other PII (blocked for all ages)
+        critical_pii = pii_types.intersection({PIILabel.EMAIL, PIILabel.PHONE, PIILabel.ADDRESS, PIILabel.CREDIT_CARD, PIILabel.SSN})
+        if critical_pii:
+            action = "block"
+            reason = f"PII detected: {', '.join([p.value for p in critical_pii])}"
+        elif not has_social_media and not pii_types:
+            action = "allow"
+            reason = "No PII detected"
+        return {
+            "has_pii": len(pii_types) > 0,
+            "pii_types": [p.value for p in pii_types],
+            "details": pii_found,
+            "social_media_allowed": social_media_allowed,
+            "grooming_risk": grooming_risk,
+            "grooming_keywords": grooming_keywords,
+            "action": action,
+            "reason": reason,
+            "age": age
+        }
+# Integration with main moderation system
+class CombinedModerationFilter:
+    """Combines content moderation + PII detection"""
+    def __init__(self, content_model_path="./moderation_model_v2.pkl"):
+        from enhanced_moderation import EnhancedContentModerator, ContentLabel
+        self.content_moderator = EnhancedContentModerator()
+        self.content_moderator.load(content_model_path)
+        self.pii_detector = PIIDetector()
+        # Age-based rules
+        self.under_13_blocked_content = [1, 2, 3, 4, 5]  # All except SAFE
+        self.teen_plus_blocked_content = [1, 3, 4, 5]  # Allow SWEARING_REACTION
+    def check(self, text: str, age: int) -> Dict:
+        """Full check: content + PII"""
+        from enhanced_moderation import ContentLabel
+        # Step 1: PII Check
+        pii_result = self.pii_detector.scan(text, age)
+        if pii_result["action"] == "block":
+            return {
+                "allowed": False,
+                "violation": "PII",
+                "pii_details": pii_result,
+                "content_details": None,
+                "reason": pii_result["reason"],
+                "age": age
+            }
+        # Step 2: Content Moderation Check
+        content_label, confidence = self.content_moderator.predict(text)
+        # Determine if content is allowed
+        if age >= 13:
+            content_allowed = content_label.value not in self.teen_plus_blocked_content
+        else:
+            content_allowed = content_label.value not in self.under_13_blocked_content
+        # Special case: reaction swearing for 13+
+        if not content_allowed and content_label.value == 2 and age >= 13:  # SWEARING_REACTION = 2
+            content_allowed = True
+            content_reason = "Swearing permitted as reaction (13+)"
+        elif not content_allowed:
+            content_reason = f"{content_label.name} detected"
+        else:
+            content_reason = "Content safe"
+        if not content_allowed:
+            return {
+                "allowed": False,
+                "violation": "CONTENT",
+                "pii_details": pii_result,
+                "content_details": {
+                    "label": content_label.name,
+                    "confidence": confidence
+                },
+                "reason": content_reason,
+                "age": age
+            }
+        # All checks passed
+        return {
+            "allowed": True,
+            "violation": None,
+            "pii_details": pii_result,
+            "content_details": {
+                "label": content_label.name,
+                "confidence": confidence
+            },
+            "reason": "Content and PII checks passed",
+            "age": age
+        }
+# Example usage
+if __name__ == "__main__":
+    detector = PIIDetector()
+    test_cases = [
+        ("My email is john@example.com", 15),
+        ("Call me at 555-123-4567", 16),
+        ("I'm at 123 Main Street", 14),
+        ("Follow me on instagram @cooluser", 10),
+        ("Follow me on instagram @cooluser", 15),
+        ("DM me on instagram, don't tell your parents", 15),
+        ("Check my tiktok @user", 14),
+        ("Send me pics on snapchat, it's our secret", 13),
+    ]
+    print("PII Detection Tests")
+    print("=" * 70)
+    for text, age in test_cases:
+        result = detector.scan(text, age)
+        status = "✅ ALLOW" if result["action"] == "allow" else "❌ BLOCK"
+        print(f"\nAge {age}: '{text}'")
+        print(f"  {status} - {result['reason']}")
+        if result["grooming_risk"] > 0:
+            print(f"  Grooming risk: {result['grooming_risk']:.0%}")
+            print(f"  Keywords: {result['grooming_keywords']}")