verifile-x-api / backend /utils /validators.py
abinazebinoy's picture
Optimize file validation order for DoS protection
4f3601a
"""
File validation utilities for secure file processing.
Why: Prevent malicious files, enforce size limits, validate MIME types.
"""
import magic
from typing import Tuple, Optional
from backend.core.config import settings
from backend.core.logger import setup_logger
logger = setup_logger(__name__)
class FileValidationError(Exception):
"""Custom exception for file validation failures."""
pass
def validate_file_type(file_bytes: bytes, filename: str) -> Tuple[str, str]:
"""
Validate file MIME type using python-magic (reads file signature).
Why python-magic?
- Reads actual file headers, not just extension
- Prevents .exe renamed to .jpg attacks
Args:
file_bytes: Raw file content
filename: Original filename (for extension check)
Returns:
Tuple of (mime_type, file_extension)
Raises:
FileValidationError: If file type not allowed
"""
# Get MIME type from file content (not extension)
mime = magic.Magic(mime=True)
detected_mime = mime.from_buffer(file_bytes)
logger.info(f"File '{filename}' detected as {detected_mime}")
# Combine all allowed types
allowed_types = (
settings.ALLOWED_IMAGE_TYPES +
settings.ALLOWED_VIDEO_TYPES +
settings.ALLOWED_DOC_TYPES
)
if detected_mime not in allowed_types:
raise FileValidationError(
f"File type '{detected_mime}' not allowed. "
f"Allowed: {', '.join(allowed_types)}"
)
# Extract extension
extension = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
return detected_mime, extension
def validate_file_size(file_bytes: bytes, filename: str) -> int:
"""
Validate file size against limit.
Why size limits?
- Prevent DoS attacks (100GB upload)
- Memory constraints (in-memory processing)
- Reasonable for forensic analysis
Args:
file_bytes: Raw file content
filename: Original filename (for logging)
Returns:
File size in bytes
Raises:
FileValidationError: If file exceeds limit
"""
size_bytes = len(file_bytes)
size_mb = size_bytes / (1024 * 1024)
max_size_mb = settings.MAX_FILE_SIZE_MB
logger.info(f"File '{filename}' size: {size_mb:.2f} MB")
if size_mb > max_size_mb:
raise FileValidationError(
f"File size ({size_mb:.2f} MB) exceeds limit ({max_size_mb} MB)"
)
return size_bytes
def validate_file(file_bytes: bytes, filename: str) -> dict:
"""
Complete file validation (type + size).
"""
# Validate size first (fail fast for DoS protection)
size_bytes = validate_file_size(file_bytes, filename)
# Then validate type
mime_type, extension = validate_file_type(file_bytes, filename)
return {
"valid": True,
"mime_type": mime_type,
"extension": extension,
"size_bytes": size_bytes,
"size_mb": round(size_bytes / (1024 * 1024), 2),
"filename": filename
}