AI-Research-Paper-Analyst / tools /injection_scanner.py
Saleh
Clean deployment to HuggingFace Space
2447eba
"""
Prompt Injection Scanner Tool — Detects adversarial prompt injection in text.
Assigned To: Safety Guardian agent ONLY
Reference: system_design.md — Tool 4 (Lines 504-541)
Reference: engineering_guardrails.md — §2 Tool-Call Argument Validation
Key guardrails:
- Checks 10+ adversarial patterns
- Returns error STRINGS, never raises exceptions
- Flags document as suspicious if patterns found
"""
import re
import json
from crewai.tools import tool
@tool
def prompt_injection_scanner_tool(text: str = "") -> str:
"""Scan text for prompt injection attempts. Returns JSON with is_safe flag
and list of suspicious patterns found. Pass the text to scan as the 'text' argument."""
# === INPUT VALIDATION ===
if not text or not isinstance(text, str):
return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty or invalid input"})
if len(text.strip()) == 0:
return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty text provided"})
# === INJECTION PATTERN DETECTION ===
try:
suspicious_patterns = [
r"ignore\s+(all\s+)?previous\s+instructions",
r"disregard\s+(all\s+)?(above|previous)",
r"forget\s+(everything|all|your\s+instructions)",
r"new\s+instructions?\s*:",
r"\[INST\]",
r"<\|im_start\|>",
r"<\|system\|>",
r"override\s+(all\s+)?safety",
r"jailbreak",
]
findings = []
text_lower = text.lower()
for pattern in suspicious_patterns:
if re.search(pattern, text_lower):
findings.append(pattern)
is_safe = len(findings) == 0
return json.dumps({
"is_safe": is_safe,
"suspicious_patterns": findings,
"patterns_checked": len(suspicious_patterns),
})
except Exception as e:
# Fail-safe: if scanning fails, treat as unsafe
return json.dumps({
"is_safe": False,
"suspicious_patterns": [],
"error": f"Injection scan failed: {type(e).__name__}: {str(e)}"
})