File size: 3,111 Bytes
bdb9114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f3601a
 
bdb9114
4f3601a
 
 
 
bdb9114
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
File validation utilities for secure file processing.
Why: Prevent malicious files, enforce size limits, validate MIME types.
"""
import magic
from typing import Tuple, Optional
from backend.core.config import settings
from backend.core.logger import setup_logger

logger = setup_logger(__name__)


class FileValidationError(Exception):
    """Custom exception for file validation failures."""
    pass


def validate_file_type(file_bytes: bytes, filename: str) -> Tuple[str, str]:
    """
    Validate file MIME type using python-magic (reads file signature).
    
    Why python-magic? 
    - Reads actual file headers, not just extension
    - Prevents .exe renamed to .jpg attacks
    
    Args:
        file_bytes: Raw file content
        filename: Original filename (for extension check)
        
    Returns:
        Tuple of (mime_type, file_extension)
        
    Raises:
        FileValidationError: If file type not allowed
    """
    # Get MIME type from file content (not extension)
    mime = magic.Magic(mime=True)
    detected_mime = mime.from_buffer(file_bytes)
    
    logger.info(f"File '{filename}' detected as {detected_mime}")
    
    # Combine all allowed types
    allowed_types = (
        settings.ALLOWED_IMAGE_TYPES +
        settings.ALLOWED_VIDEO_TYPES +
        settings.ALLOWED_DOC_TYPES
    )
    
    if detected_mime not in allowed_types:
        raise FileValidationError(
            f"File type '{detected_mime}' not allowed. "
            f"Allowed: {', '.join(allowed_types)}"
        )
    
    # Extract extension
    extension = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
    
    return detected_mime, extension


def validate_file_size(file_bytes: bytes, filename: str) -> int:
    """
    Validate file size against limit.
    
    Why size limits?
    - Prevent DoS attacks (100GB upload)
    - Memory constraints (in-memory processing)
    - Reasonable for forensic analysis
    
    Args:
        file_bytes: Raw file content
        filename: Original filename (for logging)
        
    Returns:
        File size in bytes
        
    Raises:
        FileValidationError: If file exceeds limit
    """
    size_bytes = len(file_bytes)
    size_mb = size_bytes / (1024 * 1024)
    max_size_mb = settings.MAX_FILE_SIZE_MB
    
    logger.info(f"File '{filename}' size: {size_mb:.2f} MB")
    
    if size_mb > max_size_mb:
        raise FileValidationError(
            f"File size ({size_mb:.2f} MB) exceeds limit ({max_size_mb} MB)"
        )
    
    return size_bytes


def validate_file(file_bytes: bytes, filename: str) -> dict:
    """
    Complete file validation (type + size).
    """

    # Validate size first (fail fast for DoS protection)
    size_bytes = validate_file_size(file_bytes, filename)

    # Then validate type
    mime_type, extension = validate_file_type(file_bytes, filename)

    return {
        "valid": True,
        "mime_type": mime_type,
        "extension": extension,
        "size_bytes": size_bytes,
        "size_mb": round(size_bytes / (1024 * 1024), 2),
        "filename": filename
    }