GrowWithTalha's picture
Upload 62 files
a83c934 verified
"""Input validation and sanitization utilities
Provides functions for validating and sanitizing user input to prevent
security vulnerabilities like XSS, SQL injection, and invalid data.
"""
import re
from typing import Optional
import bleach
from email_validator import validate_email, EmailNotValidError
# Allowed HTML tags and attributes for sanitized content
ALLOWED_TAGS = [
'p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'blockquote', 'code', 'pre', 'ul', 'ol', 'li', 'a'
]
ALLOWED_ATTRIBUTES = {
'a': ['href', 'title'],
'code': ['class'],
}
def sanitize_html(text: str, strip: bool = False) -> str:
"""Sanitize HTML content to prevent XSS attacks
Args:
text: Input text that may contain HTML
strip: If True, strip all HTML tags instead of sanitizing
Returns:
Sanitized text safe for rendering
"""
if strip:
return bleach.clean(text, tags=[], strip=True)
return bleach.clean(
text,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
strip=True
)
def validate_email_address(email: str) -> tuple[bool, Optional[str]]:
"""Validate email address format
Args:
email: Email address to validate
Returns:
Tuple of (is_valid, normalized_email or None)
"""
try:
# Validate and normalize email
email_info = validate_email(email, check_deliverability=False)
return True, email_info.normalized
except EmailNotValidError:
return False, None
def validate_password_strength(password: str) -> tuple[bool, Optional[str]]:
"""Validate password strength
Requirements:
- At least 8 characters
- Contains at least one uppercase letter
- Contains at least one lowercase letter
- Contains at least one digit
- Contains at least one special character
Args:
password: Password to validate
Returns:
Tuple of (is_valid, error_message or None)
"""
if len(password) < 8:
return False, "Password must be at least 8 characters long"
if not re.search(r"[A-Z]", password):
return False, "Password must contain at least one uppercase letter"
if not re.search(r"[a-z]", password):
return False, "Password must contain at least one lowercase letter"
if not re.search(r"\d", password):
return False, "Password must contain at least one digit"
if not re.search(r"[!@#$%^&*(),.?\":{}|<>]", password):
return False, "Password must contain at least one special character"
return True, None
def sanitize_thread_id(thread_id: str) -> str:
"""Sanitize thread ID to prevent injection attacks
Args:
thread_id: Thread ID from user input
Returns:
Sanitized thread ID (alphanumeric, hyphens, underscores only)
"""
# Remove any characters that aren't alphanumeric, hyphens, or underscores
sanitized = re.sub(r"[^a-zA-Z0-9\-_]", "", thread_id)
# Limit length to 255 characters
return sanitized[:255]
def validate_content_length(content: str, max_length: int = 10000) -> tuple[bool, Optional[str]]:
"""Validate content length
Args:
content: Content to validate
max_length: Maximum allowed length
Returns:
Tuple of (is_valid, error_message or None)
"""
if not content or len(content.strip()) == 0:
return False, "Content cannot be empty"
if len(content) > max_length:
return False, f"Content exceeds maximum length of {max_length} characters"
return True, None