import re
import html
def sanitize_input(text: str) -> str:
"""
Ensures input is treated as literal text.
1. Removes any null bytes.
2. Escapes HTML entities to prevent XSS.
3. Normalizes whitespace.
"""
if not isinstance(text, str):
return ""
# Remove null bytes
text = text.replace("\x00", "")
# Escape HTML characters (prevents XSS)
text = html.escape(text)
# Normalize whitespace (optional but keeps things clean)
text = re.sub(r'\s+', ' ', text).strip()
return text
def is_suspicious(text: str) -> bool:
"""
Detects common injection patterns (SQL, Script, etc.)
just for logging/alerting purposes.
"""
patterns = [
r"(?i)SELECT.*FROM",
r"(?i)DROP.*TABLE",
r"(?i)UNION.*SELECT",
r"(?i)",
r"(?i)OR.*1=1",
r"(?i)INSERT.*INTO"
]
for pattern in patterns:
if re.search(pattern, text):
return True
return False