Spaces:

AISA-Framework
/

AI-Research-Paper-Analyst

Sleeping

AI-Research-Paper-Analyst / tools /injection_scanner.py

Saleh

Clean deployment to HuggingFace Space

2447eba 21 days ago

2.16 kB

	"""
	Prompt Injection Scanner Tool — Detects adversarial prompt injection in text.

	Assigned To: Safety Guardian agent ONLY
	Reference: system_design.md — Tool 4 (Lines 504-541)
	Reference: engineering_guardrails.md — §2 Tool-Call Argument Validation

	Key guardrails:
	- Checks 10+ adversarial patterns
	- Returns error STRINGS, never raises exceptions
	- Flags document as suspicious if patterns found
	"""

	import re
	import json
	from crewai.tools import tool


	@tool
	def prompt_injection_scanner_tool(text: str = "") -> str:
	"""Scan text for prompt injection attempts. Returns JSON with is_safe flag
	and list of suspicious patterns found. Pass the text to scan as the 'text' argument."""

	# === INPUT VALIDATION ===
	if not text or not isinstance(text, str):
	return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty or invalid input"})

	if len(text.strip()) == 0:
	return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty text provided"})

	# === INJECTION PATTERN DETECTION ===
	try:
	suspicious_patterns = [
	r"ignore\s+(all\s+)?previous\s+instructions",
	r"disregard\s+(all\s+)?(above\|previous)",
	r"forget\s+(everything\|all\|your\s+instructions)",
	r"new\s+instructions?\s*:",
	r"\[INST\]",
	r"<\\|im_start\\|>",
	r"<\\|system\\|>",
	r"override\s+(all\s+)?safety",
	r"jailbreak",
	]

	findings = []
	text_lower = text.lower()

	for pattern in suspicious_patterns:
	if re.search(pattern, text_lower):
	findings.append(pattern)

	is_safe = len(findings) == 0

	return json.dumps({
	"is_safe": is_safe,
	"suspicious_patterns": findings,
	"patterns_checked": len(suspicious_patterns),
	})

	except Exception as e:
	# Fail-safe: if scanning fails, treat as unsafe
	return json.dumps({
	"is_safe": False,
	"suspicious_patterns": [],
	"error": f"Injection scan failed: {type(e).__name__}: {str(e)}"
	})