""" Prompt Injection Scanner Tool โ€” Detects adversarial prompt injection in text. Assigned To: Safety Guardian agent ONLY Reference: system_design.md โ€” Tool 4 (Lines 504-541) Reference: engineering_guardrails.md โ€” ยง2 Tool-Call Argument Validation Key guardrails: - Checks 10+ adversarial patterns - Returns error STRINGS, never raises exceptions - Flags document as suspicious if patterns found """ import re import json from crewai.tools import tool @tool def prompt_injection_scanner_tool(text: str = "") -> str: """Scan text for prompt injection attempts. Returns JSON with is_safe flag and list of suspicious patterns found. Pass the text to scan as the 'text' argument.""" # === INPUT VALIDATION === if not text or not isinstance(text, str): return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty or invalid input"}) if len(text.strip()) == 0: return json.dumps({"is_safe": False, "suspicious_patterns": [], "error": "Empty text provided"}) # === INJECTION PATTERN DETECTION === try: suspicious_patterns = [ r"ignore\s+(all\s+)?previous\s+instructions", r"disregard\s+(all\s+)?(above|previous)", r"forget\s+(everything|all|your\s+instructions)", r"new\s+instructions?\s*:", r"\[INST\]", r"<\|im_start\|>", r"<\|system\|>", r"override\s+(all\s+)?safety", r"jailbreak", ] findings = [] text_lower = text.lower() for pattern in suspicious_patterns: if re.search(pattern, text_lower): findings.append(pattern) is_safe = len(findings) == 0 return json.dumps({ "is_safe": is_safe, "suspicious_patterns": findings, "patterns_checked": len(suspicious_patterns), }) except Exception as e: # Fail-safe: if scanning fails, treat as unsafe return json.dumps({ "is_safe": False, "suspicious_patterns": [], "error": f"Injection scan failed: {type(e).__name__}: {str(e)}" })