File size: 5,051 Bytes
ed71b0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""

Code Extraction Detector



Detects attempts to extract source code, scripts, or implementation details

from websites or systems. This is a form of intellectual property theft

and reverse engineering.

"""

from __future__ import annotations

import re
from typing import Any, Dict, Optional

from ..base import ScannerPlugin, ScanResult, PluginMetadata


CODE_EXTRACTION_PATTERNS = [
    # Code extraction intent
    r"\b(extract|pull.*source|get.*source|download.*code)\b",
    r"\b(extract|grab|pull|download|fetch)\b.*\b(javascript|js|html|css|code|source|implementation)\b",

    # Reverse engineering intent
    r"\b(understand.*code|understand.*implementation|reverse.*engineer|decompile|disassemble)\b",
    r"\b(understand|analyze)\b.*\b(how.*work|mechanism|implementation)\b",

    # Script harvesting
    r"\b(script|plugin|extension)\b.*\b(extract|pull|grab|download|source)\b",
]

CODE_KEYWORDS = {
    "javascript", "js", "code", "source", "script",
    "html", "css", "react", "vue", "angular",
    "implementation", "algorithm", "logic", "function"
}

TARGET_KEYWORDS = {
    "website", "site", "eventbrite", "ticketmaster", "meetup",
    "competitor", "competitor's", "application", "app", "platform"
}


class CodeExtractionDetector(ScannerPlugin):
    """Detects attempts to extract source code and implementation details."""

    def __init__(self):
        super().__init__(
            metadata=PluginMetadata(
                name="CodeExtractionDetector",
                version="1.0.0",
                description="Detects source code extraction and reverse engineering attempts",
                author="SecurityGateway",
            )
        )

    def scan(

        self,

        user_id: Optional[str],

        server_key: str,

        tool: str,

        arguments: Dict[str, Any],

        llm_context: Optional[str] = None,

    ) -> ScanResult:
        """

        Scan for code extraction patterns.



        Detects:

        - Source code extraction from websites

        - Script/plugin harvesting

        - Reverse engineering intent

        - Implementation detail extraction



        Args:

            user_id: User identifier

            server_key: Server key

            tool: Tool name

            arguments: Tool arguments

            llm_context: Optional context



        Returns:

            ScanResult with code extraction detection

        """
        context = (llm_context or "") + " " + self._flatten_json(arguments)
        context_lower = context.lower()

        reasons = []
        flags = {}
        risk_score = 0.0

        # 1) Pattern matching for code extraction
        if self._contains_pattern(context, CODE_EXTRACTION_PATTERNS):
            reasons.append("Code extraction or reverse engineering attempt detected.")
            flags["code_extraction"] = True
            risk_score += 0.45

        # 2) Keyword combination: code + target
        code_found = any(keyword in context_lower for keyword in CODE_KEYWORDS)
        target_found = any(keyword in context_lower for keyword in TARGET_KEYWORDS)

        if code_found and target_found:
            reasons.append("Source code extraction from target website/application detected.")
            flags["code_extraction_targeted"] = True
            risk_score += 0.50

        # 3) Reverse engineering intent
        if any(word in context_lower for word in ["reverse", "decompile", "disassemble", "how it works", "how they work"]):
            reasons.append("Reverse engineering intent detected.")
            flags["reverse_engineering"] = True
            risk_score += 0.55

        # 4) Script/component extraction
        if any(word in context_lower for word in ["extract.*script", "extract.*component", "pull.*source"]):
            reasons.append("Script or component source extraction detected.")
            flags["script_extraction"] = True
            risk_score += 0.40

        detected = bool(reasons)

        return ScanResult(
            plugin_name=self.get_metadata().name,
            detected=detected,
            risk_score=min(1.0, risk_score),
            reasons=reasons if reasons else ["No code extraction detected."],
            flags=flags,
        )

    def _flatten_json(self, value: Any) -> str:
        """Flatten nested structures to string for pattern matching."""
        if isinstance(value, dict):
            return " ".join(self._flatten_json(v) for v in value.values())
        if isinstance(value, list):
            return " ".join(self._flatten_json(v) for v in value)
        return str(value)

    def _contains_pattern(self, text: str, patterns: list) -> bool:
        """Check if text matches any pattern."""
        for pat in patterns:
            if re.search(pat, text, flags=re.IGNORECASE):
                return True
        return False


# Export as module-level plugin for auto-loading
plugin = CodeExtractionDetector()