Spaces:

MCP-1st-Birthday
/

Eventure-Project-Overview

Sleeping

App Files Files Community

Eventure-Project-Overview / security_gateway /plugins /builtin /code_extraction.py

yuki-sui

Upload 169 files

ed71b0e verified 3 months ago

raw

history blame contribute delete

5.05 kB

	"""
	Code Extraction Detector

	Detects attempts to extract source code, scripts, or implementation details
	from websites or systems. This is a form of intellectual property theft
	and reverse engineering.
	"""

	from __future__ import annotations

	import re
	from typing import Any, Dict, Optional

	from ..base import ScannerPlugin, ScanResult, PluginMetadata


	CODE_EXTRACTION_PATTERNS = [
	# Code extraction intent
	r"\b(extract\|pull.source\|get.source\|download.*code)\b",
	r"\b(extract\|grab\|pull\|download\|fetch)\b.*\b(javascript\|js\|html\|css\|code\|source\|implementation)\b",

	# Reverse engineering intent
	r"\b(understand.code\|understand.implementation\|reverse.*engineer\|decompile\|disassemble)\b",
	r"\b(understand\|analyze)\b.\b(how.work\|mechanism\|implementation)\b",

	# Script harvesting
	r"\b(script\|plugin\|extension)\b.*\b(extract\|pull\|grab\|download\|source)\b",
	]

	CODE_KEYWORDS = {
	"javascript", "js", "code", "source", "script",
	"html", "css", "react", "vue", "angular",
	"implementation", "algorithm", "logic", "function"
	}

	TARGET_KEYWORDS = {
	"website", "site", "eventbrite", "ticketmaster", "meetup",
	"competitor", "competitor's", "application", "app", "platform"
	}


	class CodeExtractionDetector(ScannerPlugin):
	"""Detects attempts to extract source code and implementation details."""

	def __init__(self):
	super().__init__(
	metadata=PluginMetadata(
	name="CodeExtractionDetector",
	version="1.0.0",
	description="Detects source code extraction and reverse engineering attempts",
	author="SecurityGateway",
	)
	)

	def scan(
	self,
	user_id: Optional[str],
	server_key: str,
	tool: str,
	arguments: Dict[str, Any],
	llm_context: Optional[str] = None,
	) -> ScanResult:
	"""
	Scan for code extraction patterns.

	Detects:
	- Source code extraction from websites
	- Script/plugin harvesting
	- Reverse engineering intent
	- Implementation detail extraction

	Args:
	user_id: User identifier
	server_key: Server key
	tool: Tool name
	arguments: Tool arguments
	llm_context: Optional context

	Returns:
	ScanResult with code extraction detection
	"""
	context = (llm_context or "") + " " + self._flatten_json(arguments)
	context_lower = context.lower()

	reasons = []
	flags = {}
	risk_score = 0.0

	# 1) Pattern matching for code extraction
	if self._contains_pattern(context, CODE_EXTRACTION_PATTERNS):
	reasons.append("Code extraction or reverse engineering attempt detected.")
	flags["code_extraction"] = True
	risk_score += 0.45

	# 2) Keyword combination: code + target
	code_found = any(keyword in context_lower for keyword in CODE_KEYWORDS)
	target_found = any(keyword in context_lower for keyword in TARGET_KEYWORDS)

	if code_found and target_found:
	reasons.append("Source code extraction from target website/application detected.")
	flags["code_extraction_targeted"] = True
	risk_score += 0.50

	# 3) Reverse engineering intent
	if any(word in context_lower for word in ["reverse", "decompile", "disassemble", "how it works", "how they work"]):
	reasons.append("Reverse engineering intent detected.")
	flags["reverse_engineering"] = True
	risk_score += 0.55

	# 4) Script/component extraction
	if any(word in context_lower for word in ["extract.script", "extract.component", "pull.*source"]):
	reasons.append("Script or component source extraction detected.")
	flags["script_extraction"] = True
	risk_score += 0.40

	detected = bool(reasons)

	return ScanResult(
	plugin_name=self.get_metadata().name,
	detected=detected,
	risk_score=min(1.0, risk_score),
	reasons=reasons if reasons else ["No code extraction detected."],
	flags=flags,
	)

	def _flatten_json(self, value: Any) -> str:
	"""Flatten nested structures to string for pattern matching."""
	if isinstance(value, dict):
	return " ".join(self._flatten_json(v) for v in value.values())
	if isinstance(value, list):
	return " ".join(self._flatten_json(v) for v in value)
	return str(value)

	def _contains_pattern(self, text: str, patterns: list) -> bool:
	"""Check if text matches any pattern."""
	for pat in patterns:
	if re.search(pat, text, flags=re.IGNORECASE):
	return True
	return False


	# Export as module-level plugin for auto-loading
	plugin = CodeExtractionDetector()