File size: 3,581 Bytes
a83c934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""Input validation and sanitization utilities

Provides functions for validating and sanitizing user input to prevent
security vulnerabilities like XSS, SQL injection, and invalid data.
"""
import re
from typing import Optional
import bleach
from email_validator import validate_email, EmailNotValidError


# Allowed HTML tags and attributes for sanitized content
ALLOWED_TAGS = [
    'p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'blockquote', 'code', 'pre', 'ul', 'ol', 'li', 'a'
]
ALLOWED_ATTRIBUTES = {
    'a': ['href', 'title'],
    'code': ['class'],
}


def sanitize_html(text: str, strip: bool = False) -> str:
    """Sanitize HTML content to prevent XSS attacks

    Args:
        text: Input text that may contain HTML
        strip: If True, strip all HTML tags instead of sanitizing

    Returns:
        Sanitized text safe for rendering
    """
    if strip:
        return bleach.clean(text, tags=[], strip=True)

    return bleach.clean(
        text,
        tags=ALLOWED_TAGS,
        attributes=ALLOWED_ATTRIBUTES,
        strip=True
    )


def validate_email_address(email: str) -> tuple[bool, Optional[str]]:
    """Validate email address format

    Args:
        email: Email address to validate

    Returns:
        Tuple of (is_valid, normalized_email or None)
    """
    try:
        # Validate and normalize email
        email_info = validate_email(email, check_deliverability=False)
        return True, email_info.normalized
    except EmailNotValidError:
        return False, None


def validate_password_strength(password: str) -> tuple[bool, Optional[str]]:
    """Validate password strength

    Requirements:
    - At least 8 characters
    - Contains at least one uppercase letter
    - Contains at least one lowercase letter
    - Contains at least one digit
    - Contains at least one special character

    Args:
        password: Password to validate

    Returns:
        Tuple of (is_valid, error_message or None)
    """
    if len(password) < 8:
        return False, "Password must be at least 8 characters long"

    if not re.search(r"[A-Z]", password):
        return False, "Password must contain at least one uppercase letter"

    if not re.search(r"[a-z]", password):
        return False, "Password must contain at least one lowercase letter"

    if not re.search(r"\d", password):
        return False, "Password must contain at least one digit"

    if not re.search(r"[!@#$%^&*(),.?\":{}|<>]", password):
        return False, "Password must contain at least one special character"

    return True, None


def sanitize_thread_id(thread_id: str) -> str:
    """Sanitize thread ID to prevent injection attacks

    Args:
        thread_id: Thread ID from user input

    Returns:
        Sanitized thread ID (alphanumeric, hyphens, underscores only)
    """
    # Remove any characters that aren't alphanumeric, hyphens, or underscores
    sanitized = re.sub(r"[^a-zA-Z0-9\-_]", "", thread_id)

    # Limit length to 255 characters
    return sanitized[:255]


def validate_content_length(content: str, max_length: int = 10000) -> tuple[bool, Optional[str]]:
    """Validate content length

    Args:
        content: Content to validate
        max_length: Maximum allowed length

    Returns:
        Tuple of (is_valid, error_message or None)
    """
    if not content or len(content.strip()) == 0:
        return False, "Content cannot be empty"

    if len(content) > max_length:
        return False, f"Content exceeds maximum length of {max_length} characters"

    return True, None