""" Security Module =============== Input validation and sanitization to prevent abuse and attacks. Features: - Input length validation - Prompt injection detection - Suspicious pattern detection - Logging of security violations Configuration: - Adjust MAX_INPUT_LENGTH and MIN_INPUT_LENGTH as needed - Add custom suspicious patterns if needed """ import re import json from datetime import datetime from pathlib import Path from typing import Tuple, Optional class SecurityValidator: """Validates and sanitizes user input.""" # Input length constraints MAX_INPUT_LENGTH = 2000 MIN_INPUT_LENGTH = 1 # Suspicious patterns that might indicate prompt injection or abuse SUSPICIOUS_PATTERNS = [ r"ignore\s+(previous|all|your)\s+instructions", r"system\s*prompt", r"you\s+are\s+now", r"pretend\s+to\s+be", r"act\s+as\s+(a|an)", r"]*>", r"javascript:", r"\{\{.*\}\}", # Template injection r"reveal\s+(your|the)\s+(prompt|instructions)", r"disregard\s+(previous|all)", r"admin\s+mode", r"developer\s+mode", ] def __init__(self, log_dir: str = "logs"): """Initialize security validator.""" self.log_dir = Path(log_dir) try: self.log_dir.mkdir(parents=True, exist_ok=True) except (PermissionError, OSError): import tempfile self.log_dir = Path(tempfile.gettempdir()) / "hickeylab_logs" self.log_dir.mkdir(parents=True, exist_ok=True) self.security_log = self.log_dir / "security.jsonl" def validate_input( self, user_input: str, session_id: str ) -> Tuple[bool, str, Optional[str]]: """ Validate and sanitize user input. Args: user_input: The user's input text session_id: Unique session identifier for logging Returns: Tuple of (is_valid, cleaned_input, error_message) - is_valid: True if input passes all checks - cleaned_input: The cleaned/trimmed input - error_message: User-facing error message if invalid """ # Strip whitespace cleaned = user_input.strip() # Check minimum length if len(cleaned) < self.MIN_INPUT_LENGTH: return False, "", "Please enter a question." # Check maximum length if len(cleaned) > self.MAX_INPUT_LENGTH: return ( False, "", f"⚠️ Question too long. Please keep your question under {self.MAX_INPUT_LENGTH} characters. " f"(Current: {len(cleaned)} characters)" ) # Check for suspicious patterns for pattern in self.SUSPICIOUS_PATTERNS: if re.search(pattern, cleaned, re.IGNORECASE): self._log_suspicious(session_id, cleaned, pattern) return ( False, "", "⚠️ Your question contains invalid content. Please rephrase and try again." ) # Check for excessive special characters (might indicate injection attempt) special_char_ratio = len(re.findall(r"[^a-zA-Z0-9\s.,;:?!()\-']", cleaned)) / max(len(cleaned), 1) if special_char_ratio > 0.3: # More than 30% special characters self._log_suspicious(session_id, cleaned, "excessive_special_chars") return ( False, "", "⚠️ Your question contains unusual characters. Please use standard text." ) # All checks passed return True, cleaned, None def _log_suspicious(self, session_id: str, content: str, reason: str) -> None: """Log suspicious input for security review.""" log_entry = { "timestamp": datetime.utcnow().isoformat(), "session_id": session_id[:8] if len(session_id) >= 8 else session_id, "content_length": len(content), "content_preview": content[:100] + "..." if len(content) > 100 else content, "reason": reason } try: with open(self.security_log, "a", encoding="utf-8") as f: f.write(json.dumps(log_entry) + "\n") except (IOError, OSError) as e: print(f"Warning: Could not log security violation: {e}")