| | """
|
| | Code Extraction Detector
|
| |
|
| | Detects attempts to extract source code, scripts, or implementation details
|
| | from websites or systems. This is a form of intellectual property theft
|
| | and reverse engineering.
|
| | """
|
| |
|
| | from __future__ import annotations
|
| |
|
| | import re
|
| | from typing import Any, Dict, Optional
|
| |
|
| | from ..base import ScannerPlugin, ScanResult, PluginMetadata
|
| |
|
| |
|
| | CODE_EXTRACTION_PATTERNS = [
|
| |
|
| | r"\b(extract|pull.*source|get.*source|download.*code)\b",
|
| | r"\b(extract|grab|pull|download|fetch)\b.*\b(javascript|js|html|css|code|source|implementation)\b",
|
| |
|
| |
|
| | r"\b(understand.*code|understand.*implementation|reverse.*engineer|decompile|disassemble)\b",
|
| | r"\b(understand|analyze)\b.*\b(how.*work|mechanism|implementation)\b",
|
| |
|
| |
|
| | r"\b(script|plugin|extension)\b.*\b(extract|pull|grab|download|source)\b",
|
| | ]
|
| |
|
| | CODE_KEYWORDS = {
|
| | "javascript", "js", "code", "source", "script",
|
| | "html", "css", "react", "vue", "angular",
|
| | "implementation", "algorithm", "logic", "function"
|
| | }
|
| |
|
| | TARGET_KEYWORDS = {
|
| | "website", "site", "eventbrite", "ticketmaster", "meetup",
|
| | "competitor", "competitor's", "application", "app", "platform"
|
| | }
|
| |
|
| |
|
| | class CodeExtractionDetector(ScannerPlugin):
|
| | """Detects attempts to extract source code and implementation details."""
|
| |
|
| | def __init__(self):
|
| | super().__init__(
|
| | metadata=PluginMetadata(
|
| | name="CodeExtractionDetector",
|
| | version="1.0.0",
|
| | description="Detects source code extraction and reverse engineering attempts",
|
| | author="SecurityGateway",
|
| | )
|
| | )
|
| |
|
| | def scan(
|
| | self,
|
| | user_id: Optional[str],
|
| | server_key: str,
|
| | tool: str,
|
| | arguments: Dict[str, Any],
|
| | llm_context: Optional[str] = None,
|
| | ) -> ScanResult:
|
| | """
|
| | Scan for code extraction patterns.
|
| |
|
| | Detects:
|
| | - Source code extraction from websites
|
| | - Script/plugin harvesting
|
| | - Reverse engineering intent
|
| | - Implementation detail extraction
|
| |
|
| | Args:
|
| | user_id: User identifier
|
| | server_key: Server key
|
| | tool: Tool name
|
| | arguments: Tool arguments
|
| | llm_context: Optional context
|
| |
|
| | Returns:
|
| | ScanResult with code extraction detection
|
| | """
|
| | context = (llm_context or "") + " " + self._flatten_json(arguments)
|
| | context_lower = context.lower()
|
| |
|
| | reasons = []
|
| | flags = {}
|
| | risk_score = 0.0
|
| |
|
| |
|
| | if self._contains_pattern(context, CODE_EXTRACTION_PATTERNS):
|
| | reasons.append("Code extraction or reverse engineering attempt detected.")
|
| | flags["code_extraction"] = True
|
| | risk_score += 0.45
|
| |
|
| |
|
| | code_found = any(keyword in context_lower for keyword in CODE_KEYWORDS)
|
| | target_found = any(keyword in context_lower for keyword in TARGET_KEYWORDS)
|
| |
|
| | if code_found and target_found:
|
| | reasons.append("Source code extraction from target website/application detected.")
|
| | flags["code_extraction_targeted"] = True
|
| | risk_score += 0.50
|
| |
|
| |
|
| | if any(word in context_lower for word in ["reverse", "decompile", "disassemble", "how it works", "how they work"]):
|
| | reasons.append("Reverse engineering intent detected.")
|
| | flags["reverse_engineering"] = True
|
| | risk_score += 0.55
|
| |
|
| |
|
| | if any(word in context_lower for word in ["extract.*script", "extract.*component", "pull.*source"]):
|
| | reasons.append("Script or component source extraction detected.")
|
| | flags["script_extraction"] = True
|
| | risk_score += 0.40
|
| |
|
| | detected = bool(reasons)
|
| |
|
| | return ScanResult(
|
| | plugin_name=self.get_metadata().name,
|
| | detected=detected,
|
| | risk_score=min(1.0, risk_score),
|
| | reasons=reasons if reasons else ["No code extraction detected."],
|
| | flags=flags,
|
| | )
|
| |
|
| | def _flatten_json(self, value: Any) -> str:
|
| | """Flatten nested structures to string for pattern matching."""
|
| | if isinstance(value, dict):
|
| | return " ".join(self._flatten_json(v) for v in value.values())
|
| | if isinstance(value, list):
|
| | return " ".join(self._flatten_json(v) for v in value)
|
| | return str(value)
|
| |
|
| | def _contains_pattern(self, text: str, patterns: list) -> bool:
|
| | """Check if text matches any pattern."""
|
| | for pat in patterns:
|
| | if re.search(pat, text, flags=re.IGNORECASE):
|
| | return True
|
| | return False
|
| |
|
| |
|
| |
|
| | plugin = CodeExtractionDetector()
|
| |
|