Spaces:
Sleeping
Sleeping
File size: 4,643 Bytes
abb96d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
"""
Security Module
===============
Input validation and sanitization to prevent abuse and attacks.
Features:
- Input length validation
- Prompt injection detection
- Suspicious pattern detection
- Logging of security violations
Configuration:
- Adjust MAX_INPUT_LENGTH and MIN_INPUT_LENGTH as needed
- Add custom suspicious patterns if needed
"""
import re
import json
from datetime import datetime
from pathlib import Path
from typing import Tuple, Optional
class SecurityValidator:
"""Validates and sanitizes user input."""
# Input length constraints
MAX_INPUT_LENGTH = 2000
MIN_INPUT_LENGTH = 1
# Suspicious patterns that might indicate prompt injection or abuse
SUSPICIOUS_PATTERNS = [
r"ignore\s+(previous|all|your)\s+instructions",
r"system\s*prompt",
r"you\s+are\s+now",
r"pretend\s+to\s+be",
r"act\s+as\s+(a|an)",
r"<script[^>]*>",
r"javascript:",
r"\{\{.*\}\}", # Template injection
r"reveal\s+(your|the)\s+(prompt|instructions)",
r"disregard\s+(previous|all)",
r"admin\s+mode",
r"developer\s+mode",
]
def __init__(self, log_dir: str = "logs"):
"""Initialize security validator."""
self.log_dir = Path(log_dir)
try:
self.log_dir.mkdir(parents=True, exist_ok=True)
except (PermissionError, OSError):
import tempfile
self.log_dir = Path(tempfile.gettempdir()) / "hickeylab_logs"
self.log_dir.mkdir(parents=True, exist_ok=True)
self.security_log = self.log_dir / "security.jsonl"
def validate_input(
self,
user_input: str,
session_id: str
) -> Tuple[bool, str, Optional[str]]:
"""
Validate and sanitize user input.
Args:
user_input: The user's input text
session_id: Unique session identifier for logging
Returns:
Tuple of (is_valid, cleaned_input, error_message)
- is_valid: True if input passes all checks
- cleaned_input: The cleaned/trimmed input
- error_message: User-facing error message if invalid
"""
# Strip whitespace
cleaned = user_input.strip()
# Check minimum length
if len(cleaned) < self.MIN_INPUT_LENGTH:
return False, "", "Please enter a question."
# Check maximum length
if len(cleaned) > self.MAX_INPUT_LENGTH:
return (
False,
"",
f"⚠️ Question too long. Please keep your question under {self.MAX_INPUT_LENGTH} characters. "
f"(Current: {len(cleaned)} characters)"
)
# Check for suspicious patterns
for pattern in self.SUSPICIOUS_PATTERNS:
if re.search(pattern, cleaned, re.IGNORECASE):
self._log_suspicious(session_id, cleaned, pattern)
return (
False,
"",
"⚠️ Your question contains invalid content. Please rephrase and try again."
)
# Check for excessive special characters (might indicate injection attempt)
special_char_ratio = len(re.findall(r"[^a-zA-Z0-9\s.,;:?!()\-']", cleaned)) / max(len(cleaned), 1)
if special_char_ratio > 0.3: # More than 30% special characters
self._log_suspicious(session_id, cleaned, "excessive_special_chars")
return (
False,
"",
"⚠️ Your question contains unusual characters. Please use standard text."
)
# All checks passed
return True, cleaned, None
def _log_suspicious(self, session_id: str, content: str, reason: str) -> None:
"""Log suspicious input for security review."""
log_entry = {
"timestamp": datetime.utcnow().isoformat(),
"session_id": session_id[:8] if len(session_id) >= 8 else session_id,
"content_length": len(content),
"content_preview": content[:100] + "..." if len(content) > 100 else content,
"reason": reason
}
try:
with open(self.security_log, "a", encoding="utf-8") as f:
f.write(json.dumps(log_entry) + "\n")
except (IOError, OSError) as e:
print(f"Warning: Could not log security violation: {e}")
|