File size: 6,006 Bytes

54c5666

"""Security validation for inputs and configs"""
import os
from pathlib import Path
from typing import Dict, Any, List
import logging
import re

logger = logging.getLogger(__name__)


class SecurityValidator:
    """Security validation utilities"""
    
    # Dangerous patterns to check
    DANGEROUS_PATTERNS = [
        r'__import__',
        r'eval\s*\(',
        r'exec\s*\(',
        r'compile\s*\(',
        r'os\.system',
        r'subprocess',
        r'open\s*\(',
    ]
    
    @staticmethod
    def check_code_injection(text: str) -> bool:
        """

        Check for potential code injection

        

        Args:

            text: String to check

        

        Returns:

            True if safe, False if dangerous patterns detected

        """
        for pattern in SecurityValidator.DANGEROUS_PATTERNS:
            if re.search(pattern, text, re.IGNORECASE):
                logger.warning(f"Dangerous pattern detected: {pattern}")
                return False
        return True


def validate_model_path(path: str, allowed_dirs: List[str] = None) -> bool:
    """

    Validate model path to prevent directory traversal

    

    Args:

        path: Path to validate

        allowed_dirs: List of allowed base directories

    

    Returns:

        True if path is safe, False otherwise

    

    Raises:

        ValueError: If path is dangerous

        FileNotFoundError: If path doesn't exist

    """
    # Resolve to absolute path
    try:
        abs_path = Path(path).resolve()
    except Exception as e:
        raise ValueError(f"Invalid path: {path}") from e
    
    # Check for directory traversal
    if '..' in str(abs_path):
        raise ValueError("Directory traversal detected in path")
    
    # Check if path contains suspicious characters
    suspicious_chars = ['|', ';', '&', '$', '`']
    if any(char in str(path) for char in suspicious_chars):
        raise ValueError(f"Suspicious characters in path: {path}")
    
    # Check allowed directories if specified
    if allowed_dirs:
        allowed_dirs_resolved = [Path(d).resolve() for d in allowed_dirs]
        if not any(abs_path.is_relative_to(allowed_dir) for allowed_dir in allowed_dirs_resolved):
            raise ValueError(f"Path not in allowed directories: {path}")
    
    # Check file exists (optional - comment out if creating new files)
    # if not abs_path.exists():
    #     raise FileNotFoundError(f"Path not found: {path}")
    
    logger.debug(f"Path validated: {abs_path}")
    return True


def sanitize_config(config: dict, max_depth: int = 10) -> dict:
    """

    Sanitize configuration to prevent code injection

    

    Args:

        config: Configuration dictionary

        max_depth: Maximum nesting depth to check

    

    Returns:

        Sanitized configuration

    

    Raises:

        ValueError: If dangerous configuration detected

    """
    dangerous_keys = ['__import__', 'eval', 'exec', 'compile', 'open', 'input']
    
    def check_dict(d: Dict[str, Any], depth: int = 0):
        """Recursively check dictionary"""
        if depth > max_depth:
            raise ValueError(f"Configuration nesting too deep: {depth}")
        
        for key, value in d.items():
            # Check key names
            key_lower = str(key).lower()
            if any(danger in key_lower for danger in dangerous_keys):
                raise ValueError(f"Dangerous configuration key: {key}")
            
            # Check for code in string values
            if isinstance(value, str):
                if not SecurityValidator.check_code_injection(value):
                    raise ValueError(f"Potential code injection in config value: {key}")
            
            # Recursively check nested dicts
            elif isinstance(value, dict):
                check_dict(value, depth + 1)
            
            # Check lists
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        check_dict(item, depth + 1)
                    elif isinstance(item, str):
                        if not SecurityValidator.check_code_injection(item):
                            raise ValueError(f"Potential code injection in config list: {key}")
    
    # Create a copy and validate
    sanitized = config.copy()
    check_dict(sanitized)
    
    logger.debug("Configuration sanitized successfully")
    return sanitized


def validate_file_size(file_path: str, max_size_mb: int = 1000) -> bool:
    """

    Validate file size to prevent resource exhaustion

    

    Args:

        file_path: Path to file

        max_size_mb: Maximum file size in MB

    

    Returns:

        True if file size is acceptable

    

    Raises:

        ValueError: If file is too large

    """
    path = Path(file_path)
    
    if not path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")
    
    size_mb = path.stat().st_size / (1024 * 1024)
    
    if size_mb > max_size_mb:
        raise ValueError(
            f"File too large: {size_mb:.2f}MB (max: {max_size_mb}MB)"
        )
    
    return True


def sanitize_filename(filename: str) -> str:
    """

    Sanitize filename to prevent path traversal

    

    Args:

        filename: Original filename

    

    Returns:

        Sanitized filename

    """
    # Remove path separators
    filename = os.path.basename(filename)
    
    # Remove dangerous characters
    dangerous_chars = ['..', '/', '\\', '\0', '|', ';', '&', '$', '`', '<', '>']
    for char in dangerous_chars:
        filename = filename.replace(char, '_')
    
    # Limit length
    max_length = 255
    if len(filename) > max_length:
        name, ext = os.path.splitext(filename)
        filename = name[:max_length - len(ext)] + ext
    
    return filename