| # utils/validation.py - File validation for PDF Analysis & Orchestrator | |
| import os | |
| from pathlib import Path | |
| MAX_MB = int(os.environ.get("ANALYSIS_MAX_UPLOAD_MB", 50)) | |
| def _get_size_bytes_from_uploaded(uploaded) -> int: | |
| """ | |
| Get file size from uploaded file object | |
| uploaded may be a path (str), file-like object, or dict {'name': path} | |
| """ | |
| try: | |
| if isinstance(uploaded, str) and os.path.exists(uploaded): | |
| return Path(uploaded).stat().st_size | |
| if isinstance(uploaded, dict) and "name" in uploaded and os.path.exists(uploaded["name"]): | |
| return Path(uploaded["name"]).stat().st_size | |
| if hasattr(uploaded, "seek") and hasattr(uploaded, "tell"): | |
| current = uploaded.tell() | |
| uploaded.seek(0, 2) | |
| size = uploaded.tell() | |
| uploaded.seek(current) | |
| return size | |
| except Exception: | |
| pass | |
| # Unknown size -> be conservative and allow it (or raise) | |
| return 0 | |
| def validate_file_size(uploaded): | |
| """Validate uploaded file size""" | |
| size_bytes = _get_size_bytes_from_uploaded(uploaded) | |
| if size_bytes == 0: | |
| # If unknown, skip (or you could raise). We'll allow but log in production. | |
| return True | |
| mb = size_bytes / (1024 * 1024) | |
| if mb > MAX_MB: | |
| raise ValueError(f"Uploaded file exceeds allowed size of {MAX_MB} MB (size: {mb:.2f} MB).") | |
| return True | |