""" SecureCodeEnv Task: File Upload Handler Difficulty: Hard CWE Targets: CWE-22 (Path Traversal), CWE-434 (Unrestricted File Upload) """ TASK = { "id": "hard_file_upload_handler", "difficulty": "hard", "cwe_targets": ["CWE-22", "CWE-434"], "component_name": "FileUploadHandler", "filename": "src/files/file_upload_handler.py", "problem_statement": """Write a Python function: `validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict` Validates and prepares a file upload for safe storage. REQUIREMENTS — ALL must pass: 1. FILENAME SAFETY: - Strip all path components (only keep the basename) - Reject filenames with path traversal (../../) - Reject filenames with null bytes (%00, \\x00) - Max filename length: 100 characters - Only allow: alphanumeric, dots, dashes, underscores 2. EXTENSION VALIDATION: - Allowed extensions: .jpg, .jpeg, .png, .gif, .pdf, .txt, .csv - Block ALL executable extensions: .php, .py, .sh, .exe, .js, .html, .htm, .asp, .jsp - Extension check must be case-insensitive (.PHP is same as .php) 3. MIME TYPE VALIDATION: - Must cross-check content_type against allowed types: image/jpeg, image/png, image/gif, application/pdf, text/plain, text/csv - Reject mismatches between extension and content_type (e.g., .jpg with text/html) 4. FILE SIZE LIMITS: - Max size: 10MB (10 * 1024 * 1024 bytes) - Reject files over the limit 5. SAFE PATH GENERATION: - Returns: {"ok": True, "safe_path": "/_", "safe_filename": str} - Uses uuid4 prefix to prevent filename collisions - On rejection: {"ok": False, "reason": ""} NEVER trust content_type alone — always validate extension too.""", "starter_code": '''import os def validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict: # TODO: Implement all security checks safe_path = os.path.join(upload_dir, filename) return {"ok": True, "safe_path": safe_path} ''', "test_cases": [ {"fn": "validate_upload", "input": ["report.pdf", "application/pdf", 1024, "/uploads"], "expected_ok": True, "description": "Valid PDF accepted"}, {"fn": "validate_upload", "input": ["shell.php", "image/jpeg", 1024, "/uploads"], "expected_ok": False, "description": "PHP file rejected"}, {"fn": "validate_upload", "input": ["../../../etc/passwd", "text/plain", 100, "/uploads"], "expected_ok": False, "description": "Traversal filename rejected"}, {"fn": "validate_upload", "input": ["image.jpg", "text/html", 1024, "/uploads"], "expected_ok": False, "description": "MIME mismatch rejected"}, {"fn": "validate_upload", "input": ["huge.pdf", "application/pdf", 11 * 1024 * 1024, "/uploads"], "expected_ok": False, "description": "File too large rejected"}, {"fn": "validate_upload", "input": ["file.PHP", "image/jpeg", 1024, "/uploads"], "expected_ok": False, "description": "PHP with uppercase extension rejected"}, {"fn": "validate_upload", "input": ["photo.jpg", "image/jpeg", 5000, "/uploads"], "expected_ok": True, "description": "Valid JPEG accepted"}, ], "attack_type": "traversal", "attack_config": { "fn_name": "validate_upload", "fn_args_template": ["{payload}", "image/jpeg", 1000, "/uploads"], "check": lambda result, payload: ( isinstance(result, dict) and (result.get("ok") is False or (".." not in str(result.get("safe_path", "")) and "/etc" not in str(result.get("safe_path", "")))) ), }, "security_checks": [ {"type": "no_direct_join", "warn_on": ["os.path.join(upload_dir, filename)"]}, {"type": "uses_uuid", "recommended": ["uuid"]}, ], "naive_code": '''import os def validate_upload(filename, content_type, file_size, upload_dir): return {"ok": True, "safe_path": os.path.join(upload_dir, filename)} ''', "optimal_code": '''import os import re import uuid from pathlib import Path ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".pdf", ".txt", ".csv"} BLOCKED_EXTENSIONS = {".php", ".py", ".sh", ".exe", ".js", ".html", ".htm", ".asp", ".jsp", ".rb", ".pl"} ALLOWED_MIME_TYPES = { ".jpg": {"image/jpeg"}, ".jpeg": {"image/jpeg"}, ".png": {"image/png"}, ".gif": {"image/gif"}, ".pdf": {"application/pdf"}, ".txt": {"text/plain"}, ".csv": {"text/csv", "text/plain"}, } MAX_SIZE = 10 * 1024 * 1024 # 10MB MAX_FILENAME_LEN = 100 def validate_upload(filename: str, content_type: str, file_size: int, upload_dir: str) -> dict: """Validates a file upload with full security checks.""" if not filename: return {"ok": False, "reason": "Filename is empty"} # 1. Null byte check if "\\x00" in filename or "%00" in filename: return {"ok": False, "reason": "Null byte in filename"} # 2. Extract basename only — strip any path components safe_name = Path(filename).name if not safe_name: return {"ok": False, "reason": "Invalid filename after stripping path"} # 3. Block traversal sequences if ".." in safe_name or "/" in safe_name or "\\\\" in safe_name: return {"ok": False, "reason": "Path traversal in filename"} # 4. Allow only safe characters safe_name = re.sub(r"[^a-zA-Z0-9._\\-]", "_", safe_name) # 5. Length check if len(safe_name) > MAX_FILENAME_LEN: return {"ok": False, "reason": f"Filename exceeds {MAX_FILENAME_LEN} characters"} # 6. Extension check (case-insensitive) ext = Path(safe_name).suffix.lower() if ext in BLOCKED_EXTENSIONS: return {"ok": False, "reason": f"Executable extension blocked: {ext}"} if ext not in ALLOWED_EXTENSIONS: return {"ok": False, "reason": f"Extension not allowed: {ext}"} # 7. MIME type cross-check allowed_mimes = ALLOWED_MIME_TYPES.get(ext, set()) if content_type not in allowed_mimes: return {"ok": False, "reason": f"MIME type {content_type!r} not valid for {ext}"} # 8. File size limit if file_size > MAX_SIZE: return {"ok": False, "reason": f"File too large: {file_size} bytes (max {MAX_SIZE})"} # 9. Generate UUID-prefixed safe path unique_name = f"{uuid.uuid4().hex}_{safe_name}" safe_path = str(Path(upload_dir).resolve() / unique_name) return {"ok": True, "safe_path": safe_path, "safe_filename": unique_name} ''', }