Spaces:
Running
Running
File size: 3,111 Bytes
bdb9114 4f3601a bdb9114 4f3601a bdb9114 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | """
File validation utilities for secure file processing.
Why: Prevent malicious files, enforce size limits, validate MIME types.
"""
import magic
from typing import Tuple, Optional
from backend.core.config import settings
from backend.core.logger import setup_logger
logger = setup_logger(__name__)
class FileValidationError(Exception):
"""Custom exception for file validation failures."""
pass
def validate_file_type(file_bytes: bytes, filename: str) -> Tuple[str, str]:
"""
Validate file MIME type using python-magic (reads file signature).
Why python-magic?
- Reads actual file headers, not just extension
- Prevents .exe renamed to .jpg attacks
Args:
file_bytes: Raw file content
filename: Original filename (for extension check)
Returns:
Tuple of (mime_type, file_extension)
Raises:
FileValidationError: If file type not allowed
"""
# Get MIME type from file content (not extension)
mime = magic.Magic(mime=True)
detected_mime = mime.from_buffer(file_bytes)
logger.info(f"File '{filename}' detected as {detected_mime}")
# Combine all allowed types
allowed_types = (
settings.ALLOWED_IMAGE_TYPES +
settings.ALLOWED_VIDEO_TYPES +
settings.ALLOWED_DOC_TYPES
)
if detected_mime not in allowed_types:
raise FileValidationError(
f"File type '{detected_mime}' not allowed. "
f"Allowed: {', '.join(allowed_types)}"
)
# Extract extension
extension = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
return detected_mime, extension
def validate_file_size(file_bytes: bytes, filename: str) -> int:
"""
Validate file size against limit.
Why size limits?
- Prevent DoS attacks (100GB upload)
- Memory constraints (in-memory processing)
- Reasonable for forensic analysis
Args:
file_bytes: Raw file content
filename: Original filename (for logging)
Returns:
File size in bytes
Raises:
FileValidationError: If file exceeds limit
"""
size_bytes = len(file_bytes)
size_mb = size_bytes / (1024 * 1024)
max_size_mb = settings.MAX_FILE_SIZE_MB
logger.info(f"File '{filename}' size: {size_mb:.2f} MB")
if size_mb > max_size_mb:
raise FileValidationError(
f"File size ({size_mb:.2f} MB) exceeds limit ({max_size_mb} MB)"
)
return size_bytes
def validate_file(file_bytes: bytes, filename: str) -> dict:
"""
Complete file validation (type + size).
"""
# Validate size first (fail fast for DoS protection)
size_bytes = validate_file_size(file_bytes, filename)
# Then validate type
mime_type, extension = validate_file_type(file_bytes, filename)
return {
"valid": True,
"mime_type": mime_type,
"extension": extension,
"size_bytes": size_bytes,
"size_mb": round(size_bytes / (1024 * 1024), 2),
"filename": filename
}
|