abinazebinoy commited on
Commit
bdb9114
·
1 Parent(s): cd9b358

Add file validation utility (#4)

Browse files

- Implement MIME type validation using python-magic
- Add file size limit enforcement (50MB max)
- Create FileValidationError for validation failures
- Add comprehensive unit tests with fixtures
- Use centralized logger for validation events

Security features:
- Validates actual file content, not just extension
- Prevents renamed malicious files (.exe -> .jpg)
- Enforces memory-safe size limits

Closes #4

backend/core/logger.py CHANGED
@@ -1,36 +1,24 @@
1
  """
2
  Centralized logging configuration.
3
- Why: Consistent log format, easy to change output (file vs console).
4
  """
5
  import logging
6
  import sys
7
  from backend.core.config import settings
8
 
 
9
  def setup_logger(name: str) -> logging.Logger:
10
- """
11
- Create a logger with consistent formatting.
12
-
13
- Args:
14
- name: Logger name (usually __name__ of the module)
15
-
16
- Returns:
17
- Configured logger instance
18
- """
19
  logger = logging.getLogger(name)
20
 
21
- # Set level based on DEBUG mode
22
  level = logging.DEBUG if settings.DEBUG else logging.INFO
23
  logger.setLevel(level)
24
 
25
- # Prevent duplicate handlers if already configured
26
  if logger.handlers:
27
  return logger
28
 
29
- # Console handler
30
  handler = logging.StreamHandler(sys.stdout)
31
  handler.setLevel(level)
32
 
33
- # Format: timestamp - logger_name - level - message
34
  formatter = logging.Formatter(
35
  '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
36
  datefmt='%Y-%m-%d %H:%M:%S'
 
1
  """
2
  Centralized logging configuration.
 
3
  """
4
  import logging
5
  import sys
6
  from backend.core.config import settings
7
 
8
+
9
  def setup_logger(name: str) -> logging.Logger:
10
+ """Create logger with consistent formatting."""
 
 
 
 
 
 
 
 
11
  logger = logging.getLogger(name)
12
 
 
13
  level = logging.DEBUG if settings.DEBUG else logging.INFO
14
  logger.setLevel(level)
15
 
 
16
  if logger.handlers:
17
  return logger
18
 
 
19
  handler = logging.StreamHandler(sys.stdout)
20
  handler.setLevel(level)
21
 
 
22
  formatter = logging.Formatter(
23
  '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
24
  datefmt='%Y-%m-%d %H:%M:%S'
backend/tests/test_validators.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for file validation utilities.
3
+ """
4
+ import pytest
5
+ from backend.utils.validators import (
6
+ validate_file_type,
7
+ validate_file_size,
8
+ validate_file,
9
+ FileValidationError
10
+ )
11
+
12
+
13
+ def test_validate_file_type_valid_png(sample_image_bytes):
14
+ mime_type, extension = validate_file_type(sample_image_bytes, "test.png")
15
+ assert mime_type == "image/png"
16
+ assert extension == "png"
17
+
18
+
19
+ def test_validate_file_type_invalid():
20
+ fake_image = b"This is not an image"
21
+
22
+ with pytest.raises(FileValidationError):
23
+ validate_file_type(fake_image, "fake.jpg")
24
+
25
+
26
+ def test_validate_file_size_within_limit(sample_image_bytes):
27
+ size = validate_file_size(sample_image_bytes, "small.png")
28
+ assert size == len(sample_image_bytes)
29
+
30
+
31
+ def test_validate_file_size_exceeds_limit():
32
+ large_file = b"x" * (60 * 1024 * 1024)
33
+
34
+ with pytest.raises(FileValidationError):
35
+ validate_file_size(large_file, "huge.bin")
36
+
37
+
38
+ def test_validate_file_complete(sample_image_bytes):
39
+ result = validate_file(sample_image_bytes, "test.png")
40
+
41
+ assert result["valid"] is True
42
+ assert result["mime_type"] == "image/png"
43
+ assert result["extension"] == "png"
44
+ assert result["size_bytes"] > 0
45
+ assert result["size_mb"] < 0.01
46
+ assert result["filename"] == "test.png"
47
+
backend/utils/validators.py CHANGED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File validation utilities for secure file processing.
3
+ Why: Prevent malicious files, enforce size limits, validate MIME types.
4
+ """
5
+ import magic
6
+ from typing import Tuple, Optional
7
+ from backend.core.config import settings
8
+ from backend.core.logger import setup_logger
9
+
10
+ logger = setup_logger(__name__)
11
+
12
+
13
+ class FileValidationError(Exception):
14
+ """Custom exception for file validation failures."""
15
+ pass
16
+
17
+
18
+ def validate_file_type(file_bytes: bytes, filename: str) -> Tuple[str, str]:
19
+ """
20
+ Validate file MIME type using python-magic (reads file signature).
21
+
22
+ Why python-magic?
23
+ - Reads actual file headers, not just extension
24
+ - Prevents .exe renamed to .jpg attacks
25
+
26
+ Args:
27
+ file_bytes: Raw file content
28
+ filename: Original filename (for extension check)
29
+
30
+ Returns:
31
+ Tuple of (mime_type, file_extension)
32
+
33
+ Raises:
34
+ FileValidationError: If file type not allowed
35
+ """
36
+ # Get MIME type from file content (not extension)
37
+ mime = magic.Magic(mime=True)
38
+ detected_mime = mime.from_buffer(file_bytes)
39
+
40
+ logger.info(f"File '{filename}' detected as {detected_mime}")
41
+
42
+ # Combine all allowed types
43
+ allowed_types = (
44
+ settings.ALLOWED_IMAGE_TYPES +
45
+ settings.ALLOWED_VIDEO_TYPES +
46
+ settings.ALLOWED_DOC_TYPES
47
+ )
48
+
49
+ if detected_mime not in allowed_types:
50
+ raise FileValidationError(
51
+ f"File type '{detected_mime}' not allowed. "
52
+ f"Allowed: {', '.join(allowed_types)}"
53
+ )
54
+
55
+ # Extract extension
56
+ extension = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
57
+
58
+ return detected_mime, extension
59
+
60
+
61
+ def validate_file_size(file_bytes: bytes, filename: str) -> int:
62
+ """
63
+ Validate file size against limit.
64
+
65
+ Why size limits?
66
+ - Prevent DoS attacks (100GB upload)
67
+ - Memory constraints (in-memory processing)
68
+ - Reasonable for forensic analysis
69
+
70
+ Args:
71
+ file_bytes: Raw file content
72
+ filename: Original filename (for logging)
73
+
74
+ Returns:
75
+ File size in bytes
76
+
77
+ Raises:
78
+ FileValidationError: If file exceeds limit
79
+ """
80
+ size_bytes = len(file_bytes)
81
+ size_mb = size_bytes / (1024 * 1024)
82
+ max_size_mb = settings.MAX_FILE_SIZE_MB
83
+
84
+ logger.info(f"File '{filename}' size: {size_mb:.2f} MB")
85
+
86
+ if size_mb > max_size_mb:
87
+ raise FileValidationError(
88
+ f"File size ({size_mb:.2f} MB) exceeds limit ({max_size_mb} MB)"
89
+ )
90
+
91
+ return size_bytes
92
+
93
+
94
+ def validate_file(file_bytes: bytes, filename: str) -> dict:
95
+ """
96
+ Complete file validation (type + size).
97
+
98
+ Args:
99
+ file_bytes: Raw file content
100
+ filename: Original filename
101
+
102
+ Returns:
103
+ dict with validation results:
104
+ {
105
+ "valid": True,
106
+ "mime_type": "image/jpeg",
107
+ "extension": "jpg",
108
+ "size_bytes": 1024000,
109
+ "size_mb": 0.98
110
+ }
111
+
112
+ Raises:
113
+ FileValidationError: If validation fails
114
+ """
115
+ # Validate type
116
+ mime_type, extension = validate_file_type(file_bytes, filename)
117
+
118
+ # Validate size
119
+ size_bytes = validate_file_size(file_bytes, filename)
120
+
121
+ return {
122
+ "valid": True,
123
+ "mime_type": mime_type,
124
+ "extension": extension,
125
+ "size_bytes": size_bytes,
126
+ "size_mb": round(size_bytes / (1024 * 1024), 2),
127
+ "filename": filename
128
+ }