""" Code Extraction Detector Detects attempts to extract source code, scripts, or implementation details from websites or systems. This is a form of intellectual property theft and reverse engineering. """ from __future__ import annotations import re from typing import Any, Dict, Optional from ..base import ScannerPlugin, ScanResult, PluginMetadata CODE_EXTRACTION_PATTERNS = [ # Code extraction intent r"\b(extract|pull.*source|get.*source|download.*code)\b", r"\b(extract|grab|pull|download|fetch)\b.*\b(javascript|js|html|css|code|source|implementation)\b", # Reverse engineering intent r"\b(understand.*code|understand.*implementation|reverse.*engineer|decompile|disassemble)\b", r"\b(understand|analyze)\b.*\b(how.*work|mechanism|implementation)\b", # Script harvesting r"\b(script|plugin|extension)\b.*\b(extract|pull|grab|download|source)\b", ] CODE_KEYWORDS = { "javascript", "js", "code", "source", "script", "html", "css", "react", "vue", "angular", "implementation", "algorithm", "logic", "function" } TARGET_KEYWORDS = { "website", "site", "eventbrite", "ticketmaster", "meetup", "competitor", "competitor's", "application", "app", "platform" } class CodeExtractionDetector(ScannerPlugin): """Detects attempts to extract source code and implementation details.""" def __init__(self): super().__init__( metadata=PluginMetadata( name="CodeExtractionDetector", version="1.0.0", description="Detects source code extraction and reverse engineering attempts", author="SecurityGateway", ) ) def scan( self, user_id: Optional[str], server_key: str, tool: str, arguments: Dict[str, Any], llm_context: Optional[str] = None, ) -> ScanResult: """ Scan for code extraction patterns. Detects: - Source code extraction from websites - Script/plugin harvesting - Reverse engineering intent - Implementation detail extraction Args: user_id: User identifier server_key: Server key tool: Tool name arguments: Tool arguments llm_context: Optional context Returns: ScanResult with code extraction detection """ context = (llm_context or "") + " " + self._flatten_json(arguments) context_lower = context.lower() reasons = [] flags = {} risk_score = 0.0 # 1) Pattern matching for code extraction if self._contains_pattern(context, CODE_EXTRACTION_PATTERNS): reasons.append("Code extraction or reverse engineering attempt detected.") flags["code_extraction"] = True risk_score += 0.45 # 2) Keyword combination: code + target code_found = any(keyword in context_lower for keyword in CODE_KEYWORDS) target_found = any(keyword in context_lower for keyword in TARGET_KEYWORDS) if code_found and target_found: reasons.append("Source code extraction from target website/application detected.") flags["code_extraction_targeted"] = True risk_score += 0.50 # 3) Reverse engineering intent if any(word in context_lower for word in ["reverse", "decompile", "disassemble", "how it works", "how they work"]): reasons.append("Reverse engineering intent detected.") flags["reverse_engineering"] = True risk_score += 0.55 # 4) Script/component extraction if any(word in context_lower for word in ["extract.*script", "extract.*component", "pull.*source"]): reasons.append("Script or component source extraction detected.") flags["script_extraction"] = True risk_score += 0.40 detected = bool(reasons) return ScanResult( plugin_name=self.get_metadata().name, detected=detected, risk_score=min(1.0, risk_score), reasons=reasons if reasons else ["No code extraction detected."], flags=flags, ) def _flatten_json(self, value: Any) -> str: """Flatten nested structures to string for pattern matching.""" if isinstance(value, dict): return " ".join(self._flatten_json(v) for v in value.values()) if isinstance(value, list): return " ".join(self._flatten_json(v) for v in value) return str(value) def _contains_pattern(self, text: str, patterns: list) -> bool: """Check if text matches any pattern.""" for pat in patterns: if re.search(pat, text, flags=re.IGNORECASE): return True return False # Export as module-level plugin for auto-loading plugin = CodeExtractionDetector()