SecureCodeEnv / tasks /hard /file_upload_handler.py
vishaldhakad's picture
frontend adding
7257069
Raw
History Blame Contribute Delete
6.47 kB
"""
SecureCodeEnv Task: File Upload Handler
Difficulty: Hard
CWE Targets: CWE-22 (Path Traversal), CWE-434 (Unrestricted File Upload)
"""
TASK = {
"id": "hard_file_upload_handler",
"difficulty": "hard",
"cwe_targets": ["CWE-22", "CWE-434"],
"component_name": "FileUploadHandler",
"filename": "src/files/file_upload_handler.py",
"problem_statement": """Write a Python function:
`validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict`
Validates and prepares a file upload for safe storage.
REQUIREMENTS — ALL must pass:
1. FILENAME SAFETY:
- Strip all path components (only keep the basename)
- Reject filenames with path traversal (../../)
- Reject filenames with null bytes (%00, \\x00)
- Max filename length: 100 characters
- Only allow: alphanumeric, dots, dashes, underscores
2. EXTENSION VALIDATION:
- Allowed extensions: .jpg, .jpeg, .png, .gif, .pdf, .txt, .csv
- Block ALL executable extensions: .php, .py, .sh, .exe, .js, .html, .htm, .asp, .jsp
- Extension check must be case-insensitive (.PHP is same as .php)
3. MIME TYPE VALIDATION:
- Must cross-check content_type against allowed types:
image/jpeg, image/png, image/gif, application/pdf, text/plain, text/csv
- Reject mismatches between extension and content_type (e.g., .jpg with text/html)
4. FILE SIZE LIMITS:
- Max size: 10MB (10 * 1024 * 1024 bytes)
- Reject files over the limit
5. SAFE PATH GENERATION:
- Returns: {"ok": True, "safe_path": "<upload_dir>/<uuid>_<safe_filename>", "safe_filename": str}
- Uses uuid4 prefix to prevent filename collisions
- On rejection: {"ok": False, "reason": "<specific reason>"}
NEVER trust content_type alone — always validate extension too.""",
"starter_code": '''import os
def validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict:
# TODO: Implement all security checks
safe_path = os.path.join(upload_dir, filename)
return {"ok": True, "safe_path": safe_path}
''',
"test_cases": [
{"fn": "validate_upload", "input": ["report.pdf", "application/pdf", 1024, "/uploads"], "expected_ok": True, "description": "Valid PDF accepted"},
{"fn": "validate_upload", "input": ["shell.php", "image/jpeg", 1024, "/uploads"], "expected_ok": False, "description": "PHP file rejected"},
{"fn": "validate_upload", "input": ["../../../etc/passwd", "text/plain", 100, "/uploads"], "expected_ok": False, "description": "Traversal filename rejected"},
{"fn": "validate_upload", "input": ["image.jpg", "text/html", 1024, "/uploads"], "expected_ok": False, "description": "MIME mismatch rejected"},
{"fn": "validate_upload", "input": ["huge.pdf", "application/pdf", 11 * 1024 * 1024, "/uploads"], "expected_ok": False, "description": "File too large rejected"},
{"fn": "validate_upload", "input": ["file.PHP", "image/jpeg", 1024, "/uploads"], "expected_ok": False, "description": "PHP with uppercase extension rejected"},
{"fn": "validate_upload", "input": ["photo.jpg", "image/jpeg", 5000, "/uploads"], "expected_ok": True, "description": "Valid JPEG accepted"},
],
"attack_type": "traversal",
"attack_config": {
"fn_name": "validate_upload",
"fn_args_template": ["{payload}", "image/jpeg", 1000, "/uploads"],
"check": lambda result, payload: (
isinstance(result, dict) and
(result.get("ok") is False or
(".." not in str(result.get("safe_path", "")) and
"/etc" not in str(result.get("safe_path", ""))))
),
},
"security_checks": [
{"type": "no_direct_join", "warn_on": ["os.path.join(upload_dir, filename)"]},
{"type": "uses_uuid", "recommended": ["uuid"]},
],
"naive_code": '''import os
def validate_upload(filename, content_type, file_size, upload_dir):
return {"ok": True, "safe_path": os.path.join(upload_dir, filename)}
''',
"optimal_code": '''import os
import re
import uuid
from pathlib import Path
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".pdf", ".txt", ".csv"}
BLOCKED_EXTENSIONS = {".php", ".py", ".sh", ".exe", ".js", ".html", ".htm", ".asp", ".jsp", ".rb", ".pl"}
ALLOWED_MIME_TYPES = {
".jpg": {"image/jpeg"}, ".jpeg": {"image/jpeg"},
".png": {"image/png"}, ".gif": {"image/gif"},
".pdf": {"application/pdf"},
".txt": {"text/plain"}, ".csv": {"text/csv", "text/plain"},
}
MAX_SIZE = 10 * 1024 * 1024 # 10MB
MAX_FILENAME_LEN = 100
def validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict:
"""Validates a file upload with full security checks."""
if not filename:
return {"ok": False, "reason": "Filename is empty"}
# 1. Null byte check
if "\\x00" in filename or "%00" in filename:
return {"ok": False, "reason": "Null byte in filename"}
# 2. Extract basename only — strip any path components
safe_name = Path(filename).name
if not safe_name:
return {"ok": False, "reason": "Invalid filename after stripping path"}
# 3. Block traversal sequences
if ".." in safe_name or "/" in safe_name or "\\\\" in safe_name:
return {"ok": False, "reason": "Path traversal in filename"}
# 4. Allow only safe characters
safe_name = re.sub(r"[^a-zA-Z0-9._\\-]", "_", safe_name)
# 5. Length check
if len(safe_name) > MAX_FILENAME_LEN:
return {"ok": False, "reason": f"Filename exceeds {MAX_FILENAME_LEN} characters"}
# 6. Extension check (case-insensitive)
ext = Path(safe_name).suffix.lower()
if ext in BLOCKED_EXTENSIONS:
return {"ok": False, "reason": f"Executable extension blocked: {ext}"}
if ext not in ALLOWED_EXTENSIONS:
return {"ok": False, "reason": f"Extension not allowed: {ext}"}
# 7. MIME type cross-check
allowed_mimes = ALLOWED_MIME_TYPES.get(ext, set())
if content_type not in allowed_mimes:
return {"ok": False, "reason": f"MIME type {content_type!r} not valid for {ext}"}
# 8. File size limit
if file_size > MAX_SIZE:
return {"ok": False, "reason": f"File too large: {file_size} bytes (max {MAX_SIZE})"}
# 9. Generate UUID-prefixed safe path
unique_name = f"{uuid.uuid4().hex}_{safe_name}"
safe_path = str(Path(upload_dir).resolve() / unique_name)
return {"ok": True, "safe_path": safe_path, "safe_filename": unique_name}
''',
}