import re import html def sanitize_input(text: str) -> str: """ Ensures input is treated as literal text. 1. Removes any null bytes. 2. Escapes HTML entities to prevent XSS. 3. Normalizes whitespace. """ if not isinstance(text, str): return "" # Remove null bytes text = text.replace("\x00", "") # Escape HTML characters (prevents XSS) text = html.escape(text) # Normalize whitespace (optional but keeps things clean) text = re.sub(r'\s+', ' ', text).strip() return text def is_suspicious(text: str) -> bool: """ Detects common injection patterns (SQL, Script, etc.) just for logging/alerting purposes. """ patterns = [ r"(?i)SELECT.*FROM", r"(?i)DROP.*TABLE", r"(?i)UNION.*SELECT", r"(?i)", r"(?i)OR.*1=1", r"(?i)INSERT.*INTO" ] for pattern in patterns: if re.search(pattern, text): return True return False