Merge branch 'main' into dev
Browse files- .pre-commit-config.yaml +33 -0
- BUGS-AUDIT-2024-12-12.md +208 -0
- Dockerfile +5 -1
- NEXT-CONCERNS.md +77 -0
- BUGS-AUDIT-DEEP-DIVE.md → docs/bugs/BUGS-AUDIT-DEEP-DIVE.md +0 -0
- BUGS-HF-SPACES-INTEGRATION.md → docs/bugs/BUGS-HF-SPACES-INTEGRATION.md +0 -0
- HF_SPACES_UI_BROKEN_AUDIT.md → docs/bugs/HF_SPACES_UI_BROKEN_AUDIT.md +0 -0
- frontend/README.md +1 -1
- frontend/src/components/CaseSelector.tsx +68 -15
- frontend/src/components/__tests__/CaseSelector.test.tsx +30 -1
- frontend/src/hooks/useSegmentation.ts +13 -1
- frontend/src/mocks/handlers.ts +26 -2
- src/stroke_deepisles_demo/api/config.py +16 -0
- src/stroke_deepisles_demo/api/files.py +85 -0
- src/stroke_deepisles_demo/api/job_store.py +18 -3
- src/stroke_deepisles_demo/api/main.py +11 -13
- src/stroke_deepisles_demo/api/routes.py +17 -9
- src/stroke_deepisles_demo/data/__init__.py +12 -5
.pre-commit-config.yaml
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
repos:
|
|
|
|
|
|
|
|
|
|
| 2 |
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 3 |
rev: v0.14.8
|
| 4 |
hooks:
|
|
@@ -17,6 +20,36 @@ repos:
|
|
| 17 |
# Exclude auto-generated Gradio custom component files
|
| 18 |
exclude: ^packages/niivueviewer/
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 21 |
rev: v6.0.0
|
| 22 |
hooks:
|
|
|
|
| 1 |
repos:
|
| 2 |
+
# ============================================
|
| 3 |
+
# BACKEND HOOKS (Python)
|
| 4 |
+
# ============================================
|
| 5 |
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 6 |
rev: v0.14.8
|
| 7 |
hooks:
|
|
|
|
| 20 |
# Exclude auto-generated Gradio custom component files
|
| 21 |
exclude: ^packages/niivueviewer/
|
| 22 |
|
| 23 |
+
# ============================================
|
| 24 |
+
# FRONTEND HOOKS (TypeScript/React)
|
| 25 |
+
# ============================================
|
| 26 |
+
- repo: local
|
| 27 |
+
hooks:
|
| 28 |
+
- id: frontend-lint
|
| 29 |
+
name: frontend-lint (eslint)
|
| 30 |
+
entry: bash -c 'cd frontend && npm run lint'
|
| 31 |
+
language: system
|
| 32 |
+
files: ^frontend/.*\.(ts|tsx|js|jsx)$
|
| 33 |
+
pass_filenames: false
|
| 34 |
+
|
| 35 |
+
- id: frontend-typecheck
|
| 36 |
+
name: frontend-typecheck (tsc)
|
| 37 |
+
entry: bash -c 'cd frontend && npx tsc --noEmit'
|
| 38 |
+
language: system
|
| 39 |
+
files: ^frontend/.*\.(ts|tsx)$
|
| 40 |
+
pass_filenames: false
|
| 41 |
+
|
| 42 |
+
- id: frontend-test
|
| 43 |
+
name: frontend-test (vitest coverage)
|
| 44 |
+
entry: bash -c 'cd frontend && npm run test:coverage'
|
| 45 |
+
language: system
|
| 46 |
+
files: ^frontend/.*\.(ts|tsx)$
|
| 47 |
+
pass_filenames: false
|
| 48 |
+
stages: [pre-push] # Run on push only (tests are slow)
|
| 49 |
+
|
| 50 |
+
# ============================================
|
| 51 |
+
# GENERAL HOOKS
|
| 52 |
+
# ============================================
|
| 53 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 54 |
rev: v6.0.0
|
| 55 |
hooks:
|
BUGS-AUDIT-2024-12-12.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Bug Audit Report - 2024-12-12
|
| 2 |
+
|
| 3 |
+
Consolidated audit findings validated from first principles by reading source code.
|
| 4 |
+
**Status: FIXED** - P1 and P2 bugs addressed in PR #40.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## P1 - High Priority (Functional Bugs)
|
| 9 |
+
|
| 10 |
+
### BUG-005: Exception Handling Converts 400→500
|
| 11 |
+
|
| 12 |
+
**File:** `src/stroke_deepisles_demo/api/routes.py:127-129`
|
| 13 |
+
|
| 14 |
+
**Issue:** The exception handler catches ALL exceptions including HTTPException,
|
| 15 |
+
converting intentional 400 Bad Request responses into 500 Internal Server Errors.
|
| 16 |
+
|
| 17 |
+
```python
|
| 18 |
+
# Lines 96-99: Raises 400 for invalid case_id
|
| 19 |
+
if body.case_id not in valid_cases:
|
| 20 |
+
raise HTTPException(
|
| 21 |
+
status_code=400,
|
| 22 |
+
detail=f"Invalid case ID: '{body.case_id}'. Use GET /api/cases for available cases.",
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Lines 127-129: BUG - catches the HTTPException above and converts to 500
|
| 26 |
+
except Exception:
|
| 27 |
+
logger.exception("Failed to create segmentation job")
|
| 28 |
+
raise HTTPException(status_code=500, detail="Failed to create segmentation job") from None
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
**Impact:** Frontend receives 500 errors for invalid input, making debugging difficult.
|
| 32 |
+
|
| 33 |
+
**Fix:** Add `except HTTPException: raise` before `except Exception:` to re-raise HTTP exceptions.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
### BUG-006: Unbounded Concurrent Inference
|
| 38 |
+
|
| 39 |
+
**Files:** `src/stroke_deepisles_demo/api/routes.py`, `src/stroke_deepisles_demo/api/main.py`
|
| 40 |
+
|
| 41 |
+
**Issue:** No rate limiting, semaphores, or concurrency controls exist. Every request
|
| 42 |
+
spawns a GPU-bound background task without limits.
|
| 43 |
+
|
| 44 |
+
**Validation:** `grep -r "semaphore|rate.?limit|concurrent|max.*jobs|throttl"` returns 0 hits.
|
| 45 |
+
|
| 46 |
+
**Impact:**
|
| 47 |
+
- N concurrent requests = N GPU inference tasks = OOM or GPU memory exhaustion
|
| 48 |
+
- Denial of service risk (intentional or accidental)
|
| 49 |
+
- HF Spaces free tier could be overwhelmed
|
| 50 |
+
|
| 51 |
+
**Fix Options:**
|
| 52 |
+
1. Add `asyncio.Semaphore(1)` to limit concurrent inference
|
| 53 |
+
2. Add job queue depth limit (reject if >N pending jobs)
|
| 54 |
+
3. Add rate limiting middleware (slowapi or similar)
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## P2 - Medium Priority (Resource Leaks, Misconfigurations)
|
| 59 |
+
|
| 60 |
+
### BUG-008: Temp Directory Leak in Convenience Functions
|
| 61 |
+
|
| 62 |
+
**File:** `src/stroke_deepisles_demo/data/__init__.py:20-34`
|
| 63 |
+
|
| 64 |
+
**Issue:** `get_case()` and `list_case_ids()` create dataset instances without using
|
| 65 |
+
the context manager, causing temp directory accumulation.
|
| 66 |
+
|
| 67 |
+
```python
|
| 68 |
+
def get_case(case_id: str | int) -> CaseFiles:
|
| 69 |
+
dataset = load_isles_dataset() # Creates temp dir internally
|
| 70 |
+
return dataset.get_case(case_id) # cleanup() never called!
|
| 71 |
+
|
| 72 |
+
def list_case_ids() -> list[str]:
|
| 73 |
+
dataset = load_isles_dataset() # Creates temp dir internally
|
| 74 |
+
return dataset.list_case_ids() # cleanup() never called!
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
**Validation:** The loader.py docstring explicitly says:
|
| 78 |
+
> "Use as context manager: `with load_isles_dataset() as ds: ...` for automatic cleanup"
|
| 79 |
+
|
| 80 |
+
**Impact:** Unbounded /tmp growth over time, eventual disk exhaustion.
|
| 81 |
+
|
| 82 |
+
**Fix:** Refactor to use context manager or call `dataset.cleanup()` explicitly.
|
| 83 |
+
|
| 84 |
+
---
|
| 85 |
+
|
| 86 |
+
### BUG-009: Results Directory Configuration Drift
|
| 87 |
+
|
| 88 |
+
**Files:**
|
| 89 |
+
- `src/stroke_deepisles_demo/core/config.py:93` → `results_dir: Path = Path("./results")`
|
| 90 |
+
- `src/stroke_deepisles_demo/api/config.py:10` → `RESULTS_DIR = Path("/tmp/stroke-results")`
|
| 91 |
+
|
| 92 |
+
**Issue:** Two different default values for results directory in two config modules.
|
| 93 |
+
API uses `/tmp/stroke-results`, core uses `./results`.
|
| 94 |
+
|
| 95 |
+
**Impact:**
|
| 96 |
+
- Confusion about canonical location
|
| 97 |
+
- Risk of code using wrong config and writing to wrong location
|
| 98 |
+
- core/config.py's `./results` won't work on HF Spaces (not writable)
|
| 99 |
+
|
| 100 |
+
**Fix:** Consolidate to single source of truth. Remove one or make them reference each other.
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
### BUG-010: Case ID Logged Despite Sensitivity Comment
|
| 105 |
+
|
| 106 |
+
**File:** `src/stroke_deepisles_demo/api/routes.py:264`
|
| 107 |
+
|
| 108 |
+
**Issue:** Code explicitly logs case_id despite comment on line 118 saying not to:
|
| 109 |
+
|
| 110 |
+
```python
|
| 111 |
+
# Line 118 (comment):
|
| 112 |
+
# Note: Don't log case_id as it may be sensitive (medical domain)
|
| 113 |
+
|
| 114 |
+
# Line 264 (violation):
|
| 115 |
+
logger.info(
|
| 116 |
+
"Job %s completed: case=%s, dice=%.3f, time=%.1fs",
|
| 117 |
+
job_id,
|
| 118 |
+
case_id, # <-- Logged here
|
| 119 |
+
result.dice_score or 0,
|
| 120 |
+
result.elapsed_seconds,
|
| 121 |
+
)
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
**Impact:** Potential PHI/PII exposure in logs if case IDs contain patient identifiers.
|
| 125 |
+
|
| 126 |
+
**Fix:** Remove `case=%s` from log format string and remove `case_id` argument.
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
### BUG-011: Frontend Mock Filenames Don't Match Real Backend
|
| 131 |
+
|
| 132 |
+
**Files:**
|
| 133 |
+
- `frontend/src/mocks/handlers.ts:167-168`
|
| 134 |
+
- `src/stroke_deepisles_demo/api/routes.py:246-256`
|
| 135 |
+
- `src/stroke_deepisles_demo/pipeline.py:123`
|
| 136 |
+
- `src/stroke_deepisles_demo/inference/direct.py:85`
|
| 137 |
+
|
| 138 |
+
**Issue:** Mock uses different filenames than actual backend output:
|
| 139 |
+
|
| 140 |
+
| File Type | Mock (handlers.ts) | Real Backend |
|
| 141 |
+
|-----------|-------------------|--------------|
|
| 142 |
+
| DWI | `dwi.nii.gz` | `{case_id}_dwi.nii.gz` |
|
| 143 |
+
| Prediction | `prediction.nii.gz` | `lesion_msk.nii.gz` |
|
| 144 |
+
|
| 145 |
+
**Validation:**
|
| 146 |
+
- Mock: `dwiUrl: \`\${API_BASE}/files/\${jobId}/\${updatedJob.caseId}/dwi.nii.gz\``
|
| 147 |
+
- Backend pipeline.py:123: `dwi_dest = results_dir / f"{resolved_case_id}_dwi.nii.gz"`
|
| 148 |
+
- Backend direct.py:85: `expected_path = output_dir / "lesion_msk.nii.gz"`
|
| 149 |
+
|
| 150 |
+
**Impact:** Frontend tests pass but fail in production. Integration gap.
|
| 151 |
+
|
| 152 |
+
**Fix:** Update mock to match actual backend output filenames.
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## P3 - Low Priority (Observability, Best Practices)
|
| 157 |
+
|
| 158 |
+
### BUG-012: Loose Dependency Pinning
|
| 159 |
+
|
| 160 |
+
**Files:**
|
| 161 |
+
- `pyproject.toml` - Uses `>=` ranges (e.g., `fastapi>=0.115.0`)
|
| 162 |
+
- `frontend/package.json` - Uses `^` ranges (e.g., `"react": "^19.2.0"`)
|
| 163 |
+
|
| 164 |
+
**Issue:** Dependencies use loose version ranges instead of exact pins.
|
| 165 |
+
|
| 166 |
+
**Impact:** Non-reproducible builds. Breaking changes in minor updates could cause silent failures.
|
| 167 |
+
|
| 168 |
+
**Note:** Lock files (uv.lock, package-lock.json) may mitigate this if checked in.
|
| 169 |
+
|
| 170 |
+
---
|
| 171 |
+
|
| 172 |
+
## Validated as FALSE (Not Bugs)
|
| 173 |
+
|
| 174 |
+
### CLAIM: .env tracked despite .gitignore
|
| 175 |
+
**Status:** FALSE
|
| 176 |
+
**Validation:** `git ls-files .env` returns empty. File is not tracked.
|
| 177 |
+
|
| 178 |
+
### CLAIM: CORS missing .static.hf.space variant
|
| 179 |
+
**Status:** LIKELY FALSE
|
| 180 |
+
**Validation:** HuggingFace Static Spaces use `.hf.space` domain, not `.static.hf.space`.
|
| 181 |
+
The current CORS config includes `https://vibecodermcswaggins-stroke-viewer-frontend.hf.space`.
|
| 182 |
+
|
| 183 |
+
---
|
| 184 |
+
|
| 185 |
+
## Fix Status
|
| 186 |
+
|
| 187 |
+
| Bug | Status | PR |
|
| 188 |
+
|-----|--------|-----|
|
| 189 |
+
| BUG-005 | ✅ FIXED | #40 |
|
| 190 |
+
| BUG-006 | ✅ FIXED | #40 |
|
| 191 |
+
| BUG-008 | ✅ FIXED | #40 |
|
| 192 |
+
| BUG-009 | 🔄 DEFERRED | See NEXT-CONCERNS.md |
|
| 193 |
+
| BUG-010 | ✅ FIXED | #40 |
|
| 194 |
+
| BUG-011 | ✅ FIXED | #40 |
|
| 195 |
+
| BUG-012 | 🔄 DEFERRED | See NEXT-CONCERNS.md |
|
| 196 |
+
|
| 197 |
+
## Notes
|
| 198 |
+
|
| 199 |
+
1. **BUG-004 (StaticFiles CORS bypass)** was already fixed in the codebase via `files.py` router.
|
| 200 |
+
The fix comment at `api/main.py:131-132` documents this.
|
| 201 |
+
|
| 202 |
+
2. **BUG-007 (No Authentication)** was removed from audit - intentional design for public demo.
|
| 203 |
+
|
| 204 |
+
---
|
| 205 |
+
|
| 206 |
+
**Audited by:** Claude Code
|
| 207 |
+
**Date:** 2024-12-12
|
| 208 |
+
**Fixed:** 2024-12-12 (PR #40)
|
Dockerfile
CHANGED
|
@@ -75,9 +75,13 @@ EXPOSE 7860
|
|
| 75 |
# Reset ENTRYPOINT from base image
|
| 76 |
ENTRYPOINT []
|
| 77 |
|
| 78 |
-
# Explicit frontend origin for CORS
|
| 79 |
ENV FRONTEND_ORIGIN=https://vibecodermcswaggins-stroke-viewer-frontend.hf.space
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# Run FastAPI with uvicorn (module path: stroke_deepisles_demo.api.main:app)
|
| 82 |
# --proxy-headers: Trust X-Forwarded-Proto from HF Spaces proxy (ensures https:// in request.base_url)
|
| 83 |
CMD ["uvicorn", "stroke_deepisles_demo.api.main:app", "--host", "0.0.0.0", "--port", "7860", "--proxy-headers"]
|
|
|
|
| 75 |
# Reset ENTRYPOINT from base image
|
| 76 |
ENTRYPOINT []
|
| 77 |
|
| 78 |
+
# Explicit frontend origin for CORS
|
| 79 |
ENV FRONTEND_ORIGIN=https://vibecodermcswaggins-stroke-viewer-frontend.hf.space
|
| 80 |
|
| 81 |
+
# Explicit backend public URL for constructing file URLs
|
| 82 |
+
# This ensures correct https:// URLs even if proxy headers aren't forwarded correctly
|
| 83 |
+
ENV BACKEND_PUBLIC_URL=https://vibecodermcswaggins-stroke-deepisles-demo.hf.space
|
| 84 |
+
|
| 85 |
# Run FastAPI with uvicorn (module path: stroke_deepisles_demo.api.main:app)
|
| 86 |
# --proxy-headers: Trust X-Forwarded-Proto from HF Spaces proxy (ensures https:// in request.base_url)
|
| 87 |
CMD ["uvicorn", "stroke_deepisles_demo.api.main:app", "--host", "0.0.0.0", "--port", "7860", "--proxy-headers"]
|
NEXT-CONCERNS.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Next Concerns - To Investigate
|
| 2 |
+
|
| 3 |
+
Deferred items from BUGS-AUDIT-2024-12-12.md that need deeper investigation.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## BUG-009: Results Directory Configuration Drift
|
| 8 |
+
|
| 9 |
+
**Priority:** P2 (Medium)
|
| 10 |
+
|
| 11 |
+
### Current State
|
| 12 |
+
|
| 13 |
+
Two files define where results go:
|
| 14 |
+
- `core/config.py:93` → `results_dir: Path = Path("./results")`
|
| 15 |
+
- `api/config.py:10` → `RESULTS_DIR = Path("/tmp/stroke-results")`
|
| 16 |
+
|
| 17 |
+
### Investigation Needed
|
| 18 |
+
|
| 19 |
+
1. **Does anything actually use `core/config.py`'s `results_dir`?**
|
| 20 |
+
- Grep codebase for imports from `core/config`
|
| 21 |
+
- If nothing uses it, delete the duplicate
|
| 22 |
+
- If something uses it, consolidate to single source of truth
|
| 23 |
+
|
| 24 |
+
2. **Gold standard pattern:**
|
| 25 |
+
```python
|
| 26 |
+
# api/config.py (SSOT)
|
| 27 |
+
RESULTS_DIR = Path("/tmp/stroke-results")
|
| 28 |
+
|
| 29 |
+
# core/config.py (if needed)
|
| 30 |
+
from stroke_deepisles_demo.api.config import RESULTS_DIR
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
### Action
|
| 34 |
+
|
| 35 |
+
Run investigation, then either:
|
| 36 |
+
- Delete unused `core/config.py` results_dir, OR
|
| 37 |
+
- Make it import from `api/config.py`
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## BUG-012: Loose Dependency Pinning
|
| 42 |
+
|
| 43 |
+
**Priority:** P3 (Low)
|
| 44 |
+
|
| 45 |
+
### Current State
|
| 46 |
+
|
| 47 |
+
- `pyproject.toml` uses `>=` ranges (e.g., `fastapi>=0.115.0`)
|
| 48 |
+
- `package.json` uses `^` ranges (e.g., `"react": "^19.2.0"`)
|
| 49 |
+
|
| 50 |
+
### Reality Check
|
| 51 |
+
|
| 52 |
+
Lock files exist and are committed:
|
| 53 |
+
- `uv.lock` → Backend installs are deterministic
|
| 54 |
+
- `package-lock.json` → Frontend installs are deterministic
|
| 55 |
+
|
| 56 |
+
**Builds ARE reproducible** if you use the lock files.
|
| 57 |
+
|
| 58 |
+
### Investigation Needed
|
| 59 |
+
|
| 60 |
+
1. Verify `uv.lock` is committed: `git ls-files uv.lock`
|
| 61 |
+
2. Verify Docker uses `uv sync` (respects lock) not `uv pip install` (ignores lock)
|
| 62 |
+
3. Verify CI uses lock files
|
| 63 |
+
|
| 64 |
+
### Action
|
| 65 |
+
|
| 66 |
+
If lock files are committed and used correctly:
|
| 67 |
+
- Close as "mitigated by lock files"
|
| 68 |
+
- Optionally add README note about reproducible builds
|
| 69 |
+
|
| 70 |
+
If lock files are NOT being used:
|
| 71 |
+
- Fix CI/Docker to use them
|
| 72 |
+
- Document requirement
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
**Created:** 2024-12-12
|
| 77 |
+
**Status:** Pending investigation
|
BUGS-AUDIT-DEEP-DIVE.md → docs/bugs/BUGS-AUDIT-DEEP-DIVE.md
RENAMED
|
File without changes
|
BUGS-HF-SPACES-INTEGRATION.md → docs/bugs/BUGS-HF-SPACES-INTEGRATION.md
RENAMED
|
File without changes
|
HF_SPACES_UI_BROKEN_AUDIT.md → docs/bugs/HF_SPACES_UI_BROKEN_AUDIT.md
RENAMED
|
File without changes
|
frontend/README.md
CHANGED
|
@@ -99,7 +99,7 @@ If you fork this repository, update these files before deploying:
|
|
| 99 |
```
|
| 100 |
|
| 101 |
2. **Backend CORS** (`src/stroke_deepisles_demo/api/main.py`):
|
| 102 |
-
|
| 103 |
|
| 104 |
3. **Rebuild frontend**:
|
| 105 |
```bash
|
|
|
|
| 99 |
```
|
| 100 |
|
| 101 |
2. **Backend CORS** (`src/stroke_deepisles_demo/api/main.py`):
|
| 102 |
+
Add your frontend URL to the `CORS_ORIGINS` list, or set `FRONTEND_ORIGIN` env var.
|
| 103 |
|
| 104 |
3. **Rebuild frontend**:
|
| 105 |
```bash
|
frontend/src/components/CaseSelector.tsx
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
import { useEffect, useState } from "react";
|
| 2 |
-
import { apiClient } from "../api/client";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
interface CaseSelectorProps {
|
| 5 |
selectedCase: string | null;
|
|
@@ -13,36 +18,84 @@ export function CaseSelector({
|
|
| 13 |
const [cases, setCases] = useState<string[]>([]);
|
| 14 |
const [isLoading, setIsLoading] = useState(true);
|
| 15 |
const [error, setError] = useState<string | null>(null);
|
|
|
|
|
|
|
| 16 |
|
|
|
|
|
|
|
| 17 |
useEffect(() => {
|
|
|
|
| 18 |
const abortController = new AbortController();
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
setIsLoading(false);
|
|
|
|
| 33 |
}
|
| 34 |
}
|
| 35 |
-
}
|
| 36 |
|
| 37 |
fetchCases();
|
| 38 |
|
| 39 |
-
return () =>
|
|
|
|
|
|
|
|
|
|
| 40 |
}, []);
|
| 41 |
|
| 42 |
if (isLoading) {
|
| 43 |
return (
|
| 44 |
<div className="bg-gray-800 rounded-lg p-4">
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
</div>
|
| 47 |
);
|
| 48 |
}
|
|
|
|
| 1 |
import { useEffect, useState } from "react";
|
| 2 |
+
import { apiClient, ApiError } from "../api/client";
|
| 3 |
+
|
| 4 |
+
// Cold start retry configuration (matches useSegmentation.ts)
|
| 5 |
+
const MAX_COLD_START_RETRIES = 5;
|
| 6 |
+
const INITIAL_RETRY_DELAY = 2000;
|
| 7 |
+
const MAX_RETRY_DELAY = 30000;
|
| 8 |
|
| 9 |
interface CaseSelectorProps {
|
| 10 |
selectedCase: string | null;
|
|
|
|
| 18 |
const [cases, setCases] = useState<string[]>([]);
|
| 19 |
const [isLoading, setIsLoading] = useState(true);
|
| 20 |
const [error, setError] = useState<string | null>(null);
|
| 21 |
+
const [retryCount, setRetryCount] = useState(0);
|
| 22 |
+
const [isWakingUp, setIsWakingUp] = useState(false);
|
| 23 |
|
| 24 |
+
// Fetch cases on mount with cold-start retry logic
|
| 25 |
+
// Using inline async function pattern recommended by React docs for data fetching
|
| 26 |
useEffect(() => {
|
| 27 |
+
let isActive = true;
|
| 28 |
const abortController = new AbortController();
|
| 29 |
|
| 30 |
+
async function fetchCases() {
|
| 31 |
+
let attempts = 0;
|
| 32 |
+
|
| 33 |
+
while (attempts <= MAX_COLD_START_RETRIES && isActive) {
|
| 34 |
+
try {
|
| 35 |
+
const data = await apiClient.getCases(abortController.signal);
|
| 36 |
+
if (!isActive) return;
|
| 37 |
+
setCases(data.cases);
|
| 38 |
+
setIsWakingUp(false);
|
| 39 |
+
setRetryCount(0);
|
| 40 |
+
setIsLoading(false);
|
| 41 |
+
return; // Success
|
| 42 |
+
} catch (err) {
|
| 43 |
+
if (!isActive) return;
|
| 44 |
+
if (err instanceof Error && err.name === "AbortError") return;
|
| 45 |
+
|
| 46 |
+
const is503 = err instanceof ApiError && err.status === 503;
|
| 47 |
+
const isNetworkError =
|
| 48 |
+
err instanceof TypeError &&
|
| 49 |
+
err.message.toLowerCase().includes("fetch");
|
| 50 |
|
| 51 |
+
// Retry on cold start (503) or network errors
|
| 52 |
+
if ((is503 || isNetworkError) && attempts < MAX_COLD_START_RETRIES) {
|
| 53 |
+
attempts++;
|
| 54 |
+
setRetryCount(attempts);
|
| 55 |
+
setIsWakingUp(true);
|
| 56 |
+
|
| 57 |
+
// Exponential backoff
|
| 58 |
+
const delay = Math.min(
|
| 59 |
+
INITIAL_RETRY_DELAY * Math.pow(2, attempts - 1),
|
| 60 |
+
MAX_RETRY_DELAY,
|
| 61 |
+
);
|
| 62 |
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
| 63 |
+
continue;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
// Max retries exceeded or non-retryable error
|
| 67 |
+
const message =
|
| 68 |
+
is503 || isNetworkError
|
| 69 |
+
? "Backend failed to wake up. Please refresh the page."
|
| 70 |
+
: err instanceof Error
|
| 71 |
+
? err.message
|
| 72 |
+
: "Unknown error";
|
| 73 |
+
setError(`Failed to load cases: ${message}`);
|
| 74 |
+
setIsWakingUp(false);
|
| 75 |
setIsLoading(false);
|
| 76 |
+
return;
|
| 77 |
}
|
| 78 |
}
|
| 79 |
+
}
|
| 80 |
|
| 81 |
fetchCases();
|
| 82 |
|
| 83 |
+
return () => {
|
| 84 |
+
isActive = false;
|
| 85 |
+
abortController.abort();
|
| 86 |
+
};
|
| 87 |
}, []);
|
| 88 |
|
| 89 |
if (isLoading) {
|
| 90 |
return (
|
| 91 |
<div className="bg-gray-800 rounded-lg p-4">
|
| 92 |
+
{isWakingUp ? (
|
| 93 |
+
<p className="text-yellow-400">
|
| 94 |
+
Backend waking up... Retry {retryCount}/{MAX_COLD_START_RETRIES}
|
| 95 |
+
</p>
|
| 96 |
+
) : (
|
| 97 |
+
<p className="text-gray-400">Loading cases...</p>
|
| 98 |
+
)}
|
| 99 |
</div>
|
| 100 |
);
|
| 101 |
}
|
frontend/src/components/__tests__/CaseSelector.test.tsx
CHANGED
|
@@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
| 2 |
import { render, screen, waitFor } from "@testing-library/react";
|
| 3 |
import userEvent from "@testing-library/user-event";
|
| 4 |
import { server } from "../../mocks/server";
|
| 5 |
-
import { errorHandlers } from "../../mocks/handlers";
|
| 6 |
import { CaseSelector } from "../CaseSelector";
|
| 7 |
|
| 8 |
describe("CaseSelector", () => {
|
|
@@ -10,6 +10,7 @@ describe("CaseSelector", () => {
|
|
| 10 |
|
| 11 |
beforeEach(() => {
|
| 12 |
mockOnSelectCase.mockClear();
|
|
|
|
| 13 |
});
|
| 14 |
|
| 15 |
it("shows loading state initially", () => {
|
|
@@ -117,4 +118,32 @@ describe("CaseSelector", () => {
|
|
| 117 |
const container = screen.getByRole("combobox").closest("div");
|
| 118 |
expect(container).toHaveClass("bg-gray-800");
|
| 119 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
});
|
|
|
|
| 2 |
import { render, screen, waitFor } from "@testing-library/react";
|
| 3 |
import userEvent from "@testing-library/user-event";
|
| 4 |
import { server } from "../../mocks/server";
|
| 5 |
+
import { errorHandlers, resetCasesAttempts } from "../../mocks/handlers";
|
| 6 |
import { CaseSelector } from "../CaseSelector";
|
| 7 |
|
| 8 |
describe("CaseSelector", () => {
|
|
|
|
| 10 |
|
| 11 |
beforeEach(() => {
|
| 12 |
mockOnSelectCase.mockClear();
|
| 13 |
+
resetCasesAttempts();
|
| 14 |
});
|
| 15 |
|
| 16 |
it("shows loading state initially", () => {
|
|
|
|
| 118 |
const container = screen.getByRole("combobox").closest("div");
|
| 119 |
expect(container).toHaveClass("bg-gray-800");
|
| 120 |
});
|
| 121 |
+
|
| 122 |
+
it("retries on 503 cold-start and succeeds", async () => {
|
| 123 |
+
server.use(errorHandlers.casesColdStart);
|
| 124 |
+
|
| 125 |
+
render(
|
| 126 |
+
<CaseSelector selectedCase={null} onSelectCase={mockOnSelectCase} />,
|
| 127 |
+
);
|
| 128 |
+
|
| 129 |
+
// Should show waking up message during retry
|
| 130 |
+
await waitFor(
|
| 131 |
+
() => {
|
| 132 |
+
expect(screen.getByText(/waking up/i)).toBeInTheDocument();
|
| 133 |
+
},
|
| 134 |
+
{ timeout: 3000 },
|
| 135 |
+
);
|
| 136 |
+
|
| 137 |
+
// Should eventually succeed and show cases
|
| 138 |
+
await waitFor(
|
| 139 |
+
() => {
|
| 140 |
+
expect(screen.getByRole("combobox")).toBeInTheDocument();
|
| 141 |
+
},
|
| 142 |
+
{ timeout: 5000 },
|
| 143 |
+
);
|
| 144 |
+
|
| 145 |
+
expect(
|
| 146 |
+
screen.getByRole("option", { name: /sub-stroke0001/i }),
|
| 147 |
+
).toBeInTheDocument();
|
| 148 |
+
});
|
| 149 |
});
|
frontend/src/hooks/useSegmentation.ts
CHANGED
|
@@ -111,7 +111,19 @@ export function useSegmentation() {
|
|
| 111 |
// Ignore abort errors
|
| 112 |
if (err instanceof Error && err.name === "AbortError") return;
|
| 113 |
|
| 114 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
console.warn("Polling error (will retry):", err);
|
| 116 |
}
|
| 117 |
},
|
|
|
|
| 111 |
// Ignore abort errors
|
| 112 |
if (err instanceof Error && err.name === "AbortError") return;
|
| 113 |
|
| 114 |
+
// 404 = job expired or lost (HF restart) - this is TERMINAL, not transient
|
| 115 |
+
if (err instanceof ApiError && err.status === 404) {
|
| 116 |
+
stopPolling();
|
| 117 |
+
setIsLoading(false);
|
| 118 |
+
setJobStatus("failed");
|
| 119 |
+
setError(
|
| 120 |
+
"Job expired or lost. This can happen if the backend restarted. Please try again.",
|
| 121 |
+
);
|
| 122 |
+
setResult(null);
|
| 123 |
+
return;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
// Other errors (network, 5xx) - retry next interval
|
| 127 |
console.warn("Polling error (will retry):", err);
|
| 128 |
}
|
| 129 |
},
|
frontend/src/mocks/handlers.ts
CHANGED
|
@@ -159,13 +159,14 @@ export const handlers = [
|
|
| 159 |
|
| 160 |
// Include result if completed
|
| 161 |
if (updatedJob.status === "completed") {
|
|
|
|
| 162 |
response.result = {
|
| 163 |
caseId: updatedJob.caseId,
|
| 164 |
diceScore: 0.847,
|
| 165 |
volumeMl: 15.32,
|
| 166 |
elapsedSeconds: updatedJob.fastMode ? 12.5 : 45.0,
|
| 167 |
-
dwiUrl: `${API_BASE}/files/${jobId}/${updatedJob.caseId}
|
| 168 |
-
predictionUrl: `${API_BASE}/files/${jobId}/${updatedJob.caseId}/
|
| 169 |
};
|
| 170 |
}
|
| 171 |
|
|
@@ -173,6 +174,14 @@ export const handlers = [
|
|
| 173 |
}),
|
| 174 |
];
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
// Error handlers for testing error states
|
| 177 |
export const errorHandlers = {
|
| 178 |
casesServerError: http.get(`${API_BASE}/api/cases`, () => {
|
|
@@ -186,6 +195,21 @@ export const errorHandlers = {
|
|
| 186 |
return HttpResponse.error();
|
| 187 |
}),
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
segmentCreateError: http.post(`${API_BASE}/api/segment`, () => {
|
| 190 |
return HttpResponse.json(
|
| 191 |
{ detail: "Failed to create job: case not found" },
|
|
|
|
| 159 |
|
| 160 |
// Include result if completed
|
| 161 |
if (updatedJob.status === "completed") {
|
| 162 |
+
// Filenames must match actual backend output format
|
| 163 |
response.result = {
|
| 164 |
caseId: updatedJob.caseId,
|
| 165 |
diceScore: 0.847,
|
| 166 |
volumeMl: 15.32,
|
| 167 |
elapsedSeconds: updatedJob.fastMode ? 12.5 : 45.0,
|
| 168 |
+
dwiUrl: `${API_BASE}/files/${jobId}/${updatedJob.caseId}/${updatedJob.caseId}_dwi.nii.gz`,
|
| 169 |
+
predictionUrl: `${API_BASE}/files/${jobId}/${updatedJob.caseId}/lesion_msk.nii.gz`,
|
| 170 |
};
|
| 171 |
}
|
| 172 |
|
|
|
|
| 174 |
}),
|
| 175 |
];
|
| 176 |
|
| 177 |
+
// Track retry attempts for cold-start testing
|
| 178 |
+
let casesAttempts = 0;
|
| 179 |
+
|
| 180 |
+
/** Reset the cases attempt counter (call in test beforeEach) */
|
| 181 |
+
export function resetCasesAttempts(): void {
|
| 182 |
+
casesAttempts = 0;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
// Error handlers for testing error states
|
| 186 |
export const errorHandlers = {
|
| 187 |
casesServerError: http.get(`${API_BASE}/api/cases`, () => {
|
|
|
|
| 195 |
return HttpResponse.error();
|
| 196 |
}),
|
| 197 |
|
| 198 |
+
// 503 on first attempt, success on retry (tests cold-start retry)
|
| 199 |
+
casesColdStart: http.get(`${API_BASE}/api/cases`, async () => {
|
| 200 |
+
casesAttempts++;
|
| 201 |
+
if (casesAttempts === 1) {
|
| 202 |
+
return HttpResponse.json(
|
| 203 |
+
{ detail: "Service Unavailable" },
|
| 204 |
+
{ status: 503 },
|
| 205 |
+
);
|
| 206 |
+
}
|
| 207 |
+
// Succeed on retry
|
| 208 |
+
return HttpResponse.json({
|
| 209 |
+
cases: ["sub-stroke0001", "sub-stroke0002", "sub-stroke0003"],
|
| 210 |
+
});
|
| 211 |
+
}),
|
| 212 |
+
|
| 213 |
segmentCreateError: http.post(`${API_BASE}/api/segment`, () => {
|
| 214 |
return HttpResponse.json(
|
| 215 |
{ detail: "Failed to create job: case not found" },
|
src/stroke_deepisles_demo/api/config.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""API configuration constants.
|
| 2 |
+
|
| 3 |
+
Single source of truth for API configuration values.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Results directory for job outputs (must be in /tmp for HF Spaces)
|
| 9 |
+
# CRITICAL: This is the single source of truth. Import this instead of hardcoding.
|
| 10 |
+
RESULTS_DIR = Path("/tmp/stroke-results")
|
| 11 |
+
|
| 12 |
+
# Maximum active jobs (pending + running) accepted by the API
|
| 13 |
+
# This limits how many jobs can be queued/running at once, NOT serialized GPU execution
|
| 14 |
+
# T4 GPU (16GB) can handle ~1-2 concurrent DeepISLES inferences safely
|
| 15 |
+
# Value of 10 allows reasonable queue depth while preventing unbounded accumulation
|
| 16 |
+
MAX_CONCURRENT_JOBS = 10
|
src/stroke_deepisles_demo/api/files.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""File serving routes for NIfTI result files.
|
| 2 |
+
|
| 3 |
+
BUG-004 FIX: This module replaces the StaticFiles mount approach.
|
| 4 |
+
|
| 5 |
+
Previously, files were served via:
|
| 6 |
+
app.mount("/files", StaticFiles(directory=RESULTS_DIR))
|
| 7 |
+
|
| 8 |
+
The problem: StaticFiles is a mounted sub-application, and FastAPI/Starlette
|
| 9 |
+
middleware (including CORSMiddleware) does NOT propagate to mounted apps.
|
| 10 |
+
This caused NiiVue's cross-origin fetch to fail with "Failed to fetch".
|
| 11 |
+
|
| 12 |
+
Solution: Use explicit route handlers that go through the main app's middleware.
|
| 13 |
+
Now CORS headers are correctly applied to file responses.
|
| 14 |
+
|
| 15 |
+
Reference: https://github.com/fastapi/fastapi/discussions/7319
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from fastapi import APIRouter, HTTPException
|
| 19 |
+
from fastapi.responses import FileResponse
|
| 20 |
+
|
| 21 |
+
from stroke_deepisles_demo.api.config import RESULTS_DIR
|
| 22 |
+
from stroke_deepisles_demo.core.logging import get_logger
|
| 23 |
+
|
| 24 |
+
logger = get_logger(__name__)
|
| 25 |
+
|
| 26 |
+
files_router = APIRouter(prefix="/files", tags=["files"])
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@files_router.get("/{job_id}/{case_id}/{filename}")
|
| 30 |
+
async def get_result_file(job_id: str, case_id: str, filename: str) -> FileResponse:
|
| 31 |
+
"""Serve NIfTI result files with proper CORS headers.
|
| 32 |
+
|
| 33 |
+
This route goes through the main FastAPI app's middleware stack,
|
| 34 |
+
ensuring CORS and CORP headers are applied to the response.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
job_id: The job UUID from segmentation
|
| 38 |
+
case_id: The case identifier (e.g., sub-stroke0001)
|
| 39 |
+
filename: The NIfTI filename (e.g., dwi.nii.gz, prediction_fused.nii.gz)
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
FileResponse with the NIfTI file
|
| 43 |
+
|
| 44 |
+
Raises:
|
| 45 |
+
404: File not found (job expired, invalid path, or doesn't exist)
|
| 46 |
+
"""
|
| 47 |
+
# Construct file path
|
| 48 |
+
file_path = RESULTS_DIR / job_id / case_id / filename
|
| 49 |
+
|
| 50 |
+
# Security: Ensure path doesn't escape RESULTS_DIR (path traversal protection)
|
| 51 |
+
# Using is_relative_to() instead of startswith() to prevent prefix-collision bypass
|
| 52 |
+
# e.g., /tmp/stroke-results-evil/file.txt would pass startswith but fail is_relative_to
|
| 53 |
+
try:
|
| 54 |
+
base_dir = RESULTS_DIR.resolve()
|
| 55 |
+
resolved = file_path.resolve()
|
| 56 |
+
if not resolved.is_relative_to(base_dir):
|
| 57 |
+
logger.warning("Path traversal attempt blocked: %s", filename)
|
| 58 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 59 |
+
except (OSError, ValueError):
|
| 60 |
+
raise HTTPException(status_code=404, detail="Invalid file path") from None
|
| 61 |
+
|
| 62 |
+
# Check file exists
|
| 63 |
+
if not resolved.exists() or not resolved.is_file():
|
| 64 |
+
logger.debug("File not found: %s", resolved)
|
| 65 |
+
raise HTTPException(
|
| 66 |
+
status_code=404,
|
| 67 |
+
detail=f"File not found: {filename}. Job may have expired (1 hour TTL).",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Determine media type based on extension
|
| 71 |
+
# NIfTI files are typically gzip-compressed
|
| 72 |
+
if filename.endswith(".nii.gz"):
|
| 73 |
+
media_type = "application/gzip"
|
| 74 |
+
elif filename.endswith(".nii"):
|
| 75 |
+
media_type = "application/octet-stream"
|
| 76 |
+
else:
|
| 77 |
+
media_type = "application/octet-stream"
|
| 78 |
+
|
| 79 |
+
logger.debug("Serving file: %s (type: %s)", resolved, media_type)
|
| 80 |
+
|
| 81 |
+
return FileResponse(
|
| 82 |
+
path=resolved,
|
| 83 |
+
media_type=media_type,
|
| 84 |
+
filename=filename,
|
| 85 |
+
)
|
src/stroke_deepisles_demo/api/job_store.py
CHANGED
|
@@ -23,11 +23,14 @@ import threading
|
|
| 23 |
from dataclasses import dataclass
|
| 24 |
from datetime import datetime, timedelta
|
| 25 |
from enum import Enum
|
| 26 |
-
from
|
| 27 |
-
from typing import Any
|
| 28 |
|
|
|
|
| 29 |
from stroke_deepisles_demo.core.logging import get_logger
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
logger = get_logger(__name__)
|
| 32 |
|
| 33 |
# Regex for safe job IDs (alphanumeric, hyphens, underscores only)
|
|
@@ -135,7 +138,7 @@ class JobStore:
|
|
| 135 |
self._jobs: dict[str, Job] = {}
|
| 136 |
self._lock = threading.RLock()
|
| 137 |
self._ttl = ttl
|
| 138 |
-
self._results_dir = results_dir or
|
| 139 |
self._cleanup_thread: threading.Thread | None = None
|
| 140 |
self._shutdown = threading.Event()
|
| 141 |
|
|
@@ -147,6 +150,18 @@ class JobStore:
|
|
| 147 |
"""
|
| 148 |
return bool(job_id) and _SAFE_JOB_ID_PATTERN.match(job_id) is not None
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
def create_job(self, job_id: str, case_id: str, fast_mode: bool) -> Job:
|
| 151 |
"""Create a new job in PENDING status.
|
| 152 |
|
|
|
|
| 23 |
from dataclasses import dataclass
|
| 24 |
from datetime import datetime, timedelta
|
| 25 |
from enum import Enum
|
| 26 |
+
from typing import TYPE_CHECKING, Any
|
|
|
|
| 27 |
|
| 28 |
+
from stroke_deepisles_demo.api.config import RESULTS_DIR
|
| 29 |
from stroke_deepisles_demo.core.logging import get_logger
|
| 30 |
|
| 31 |
+
if TYPE_CHECKING:
|
| 32 |
+
from pathlib import Path
|
| 33 |
+
|
| 34 |
logger = get_logger(__name__)
|
| 35 |
|
| 36 |
# Regex for safe job IDs (alphanumeric, hyphens, underscores only)
|
|
|
|
| 138 |
self._jobs: dict[str, Job] = {}
|
| 139 |
self._lock = threading.RLock()
|
| 140 |
self._ttl = ttl
|
| 141 |
+
self._results_dir = results_dir or RESULTS_DIR
|
| 142 |
self._cleanup_thread: threading.Thread | None = None
|
| 143 |
self._shutdown = threading.Event()
|
| 144 |
|
|
|
|
| 150 |
"""
|
| 151 |
return bool(job_id) and _SAFE_JOB_ID_PATTERN.match(job_id) is not None
|
| 152 |
|
| 153 |
+
def get_active_job_count(self) -> int:
|
| 154 |
+
"""Return the number of active (pending or running) jobs.
|
| 155 |
+
|
| 156 |
+
Used for concurrency limiting to prevent GPU memory exhaustion.
|
| 157 |
+
"""
|
| 158 |
+
with self._lock:
|
| 159 |
+
return sum(
|
| 160 |
+
1
|
| 161 |
+
for job in self._jobs.values()
|
| 162 |
+
if job.status in (JobStatus.PENDING, JobStatus.RUNNING)
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
def create_job(self, job_id: str, case_id: str, fast_mode: bool) -> Job:
|
| 166 |
"""Create a new job in PENDING status.
|
| 167 |
|
src/stroke_deepisles_demo/api/main.py
CHANGED
|
@@ -16,23 +16,20 @@ Architecture designed to work within HuggingFace Spaces constraints:
|
|
| 16 |
import os
|
| 17 |
from collections.abc import AsyncIterator
|
| 18 |
from contextlib import asynccontextmanager
|
| 19 |
-
from pathlib import Path
|
| 20 |
from typing import Any
|
| 21 |
|
| 22 |
from fastapi import FastAPI, Request, Response
|
| 23 |
from fastapi.middleware.cors import CORSMiddleware
|
| 24 |
-
from fastapi.staticfiles import StaticFiles
|
| 25 |
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
| 26 |
|
|
|
|
|
|
|
| 27 |
from stroke_deepisles_demo.api.job_store import init_job_store
|
| 28 |
from stroke_deepisles_demo.api.routes import router
|
| 29 |
from stroke_deepisles_demo.core.logging import get_logger
|
| 30 |
|
| 31 |
logger = get_logger(__name__)
|
| 32 |
|
| 33 |
-
# Results directory (must be in /tmp for HF Spaces)
|
| 34 |
-
RESULTS_DIR = Path("/tmp/stroke-results")
|
| 35 |
-
|
| 36 |
|
| 37 |
@asynccontextmanager
|
| 38 |
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
|
|
@@ -115,24 +112,25 @@ if FRONTEND_ORIGIN and FRONTEND_ORIGIN not in CORS_ORIGINS:
|
|
| 115 |
# Add CORP middleware first (for COEP compatibility)
|
| 116 |
app.add_middleware(CORPMiddleware)
|
| 117 |
|
| 118 |
-
# Add CORS middleware with
|
| 119 |
# Note: Using allow_origins list for exact matching (no regex needed)
|
| 120 |
# This eliminates regex security concerns while maintaining single source of truth
|
| 121 |
app.add_middleware(
|
| 122 |
CORSMiddleware,
|
| 123 |
allow_origins=CORS_ORIGINS,
|
| 124 |
allow_credentials=False, # Not needed - no cookies/auth
|
| 125 |
-
allow_methods=["GET", "POST"], #
|
| 126 |
-
allow_headers=["Content-Type"], #
|
|
|
|
| 127 |
)
|
| 128 |
|
| 129 |
-
# API routes
|
| 130 |
app.include_router(router, prefix="/api")
|
| 131 |
|
| 132 |
-
#
|
| 133 |
-
#
|
| 134 |
-
|
| 135 |
-
app.
|
| 136 |
|
| 137 |
|
| 138 |
@app.get("/")
|
|
|
|
| 16 |
import os
|
| 17 |
from collections.abc import AsyncIterator
|
| 18 |
from contextlib import asynccontextmanager
|
|
|
|
| 19 |
from typing import Any
|
| 20 |
|
| 21 |
from fastapi import FastAPI, Request, Response
|
| 22 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 23 |
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
| 24 |
|
| 25 |
+
from stroke_deepisles_demo.api.config import RESULTS_DIR
|
| 26 |
+
from stroke_deepisles_demo.api.files import files_router
|
| 27 |
from stroke_deepisles_demo.api.job_store import init_job_store
|
| 28 |
from stroke_deepisles_demo.api.routes import router
|
| 29 |
from stroke_deepisles_demo.core.logging import get_logger
|
| 30 |
|
| 31 |
logger = get_logger(__name__)
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
@asynccontextmanager
|
| 35 |
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
|
|
|
|
| 112 |
# Add CORP middleware first (for COEP compatibility)
|
| 113 |
app.add_middleware(CORPMiddleware)
|
| 114 |
|
| 115 |
+
# Add CORS middleware with settings for NiiVue binary file fetching
|
| 116 |
# Note: Using allow_origins list for exact matching (no regex needed)
|
| 117 |
# This eliminates regex security concerns while maintaining single source of truth
|
| 118 |
app.add_middleware(
|
| 119 |
CORSMiddleware,
|
| 120 |
allow_origins=CORS_ORIGINS,
|
| 121 |
allow_credentials=False, # Not needed - no cookies/auth
|
| 122 |
+
allow_methods=["GET", "POST", "HEAD"], # HEAD for preflight checks
|
| 123 |
+
allow_headers=["Content-Type", "Range"], # Range needed for partial content requests
|
| 124 |
+
expose_headers=["Content-Range", "Content-Length", "Accept-Ranges"], # NiiVue needs these
|
| 125 |
)
|
| 126 |
|
| 127 |
+
# API routes (includes /api/* endpoints)
|
| 128 |
app.include_router(router, prefix="/api")
|
| 129 |
|
| 130 |
+
# File routes (serves NIfTI results through main app's middleware for CORS)
|
| 131 |
+
# BUG-004 FIX: Previously used StaticFiles mount which bypassed CORS middleware.
|
| 132 |
+
# Now using explicit routes so CORS headers are applied to file responses.
|
| 133 |
+
app.include_router(files_router)
|
| 134 |
|
| 135 |
|
| 136 |
@app.get("/")
|
src/stroke_deepisles_demo/api/routes.py
CHANGED
|
@@ -12,10 +12,10 @@ from __future__ import annotations
|
|
| 12 |
|
| 13 |
import os
|
| 14 |
import uuid
|
| 15 |
-
from pathlib import Path
|
| 16 |
|
| 17 |
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
| 18 |
|
|
|
|
| 19 |
from stroke_deepisles_demo.api.job_store import JobStatus, get_job_store
|
| 20 |
from stroke_deepisles_demo.api.schemas import (
|
| 21 |
CasesResponse,
|
|
@@ -33,9 +33,6 @@ logger = get_logger(__name__)
|
|
| 33 |
|
| 34 |
router = APIRouter()
|
| 35 |
|
| 36 |
-
# Base directory for results
|
| 37 |
-
RESULTS_BASE = Path("/tmp/stroke-results")
|
| 38 |
-
|
| 39 |
|
| 40 |
def get_backend_base_url(request: Request) -> str:
|
| 41 |
"""Get the backend's public URL for building absolute file URLs.
|
|
@@ -93,7 +90,15 @@ def create_segment_job(
|
|
| 93 |
- Returning immediately avoids timeout errors
|
| 94 |
"""
|
| 95 |
try:
|
| 96 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
valid_cases = list_case_ids()
|
| 98 |
if body.case_id not in valid_cases:
|
| 99 |
raise HTTPException(
|
|
@@ -103,7 +108,6 @@ def create_segment_job(
|
|
| 103 |
|
| 104 |
# Use full UUID hex for uniqueness (no truncation)
|
| 105 |
job_id = uuid.uuid4().hex
|
| 106 |
-
store = get_job_store()
|
| 107 |
backend_url = get_backend_base_url(request)
|
| 108 |
|
| 109 |
# Create job record
|
|
@@ -127,6 +131,10 @@ def create_segment_job(
|
|
| 127 |
message=f"Segmentation job queued for {body.case_id}",
|
| 128 |
)
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
except Exception:
|
| 131 |
logger.exception("Failed to create segmentation job")
|
| 132 |
raise HTTPException(status_code=500, detail="Failed to create segmentation job") from None
|
|
@@ -215,7 +223,7 @@ def run_segmentation_job(
|
|
| 215 |
store.update_progress(job_id, 10, "Loading case data...")
|
| 216 |
|
| 217 |
# Set up output directory
|
| 218 |
-
output_dir =
|
| 219 |
|
| 220 |
store.update_progress(job_id, 20, "Staging files for DeepISLES...")
|
| 221 |
|
|
@@ -263,10 +271,10 @@ def run_segmentation_job(
|
|
| 263 |
# Mark as completed
|
| 264 |
store.complete_job(job_id, result_data)
|
| 265 |
|
|
|
|
| 266 |
logger.info(
|
| 267 |
-
"Job %s completed:
|
| 268 |
job_id,
|
| 269 |
-
case_id,
|
| 270 |
result.dice_score or 0,
|
| 271 |
result.elapsed_seconds,
|
| 272 |
)
|
|
|
|
| 12 |
|
| 13 |
import os
|
| 14 |
import uuid
|
|
|
|
| 15 |
|
| 16 |
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
| 17 |
|
| 18 |
+
from stroke_deepisles_demo.api.config import MAX_CONCURRENT_JOBS, RESULTS_DIR
|
| 19 |
from stroke_deepisles_demo.api.job_store import JobStatus, get_job_store
|
| 20 |
from stroke_deepisles_demo.api.schemas import (
|
| 21 |
CasesResponse,
|
|
|
|
| 33 |
|
| 34 |
router = APIRouter()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def get_backend_base_url(request: Request) -> str:
|
| 38 |
"""Get the backend's public URL for building absolute file URLs.
|
|
|
|
| 90 |
- Returning immediately avoids timeout errors
|
| 91 |
"""
|
| 92 |
try:
|
| 93 |
+
# Concurrency limit to prevent GPU memory exhaustion (BUG-006 fix)
|
| 94 |
+
store = get_job_store()
|
| 95 |
+
if store.get_active_job_count() >= MAX_CONCURRENT_JOBS:
|
| 96 |
+
raise HTTPException(
|
| 97 |
+
status_code=503,
|
| 98 |
+
detail="Server busy: too many active jobs. Please try again later.",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Validate case_id exists before creating job
|
| 102 |
valid_cases = list_case_ids()
|
| 103 |
if body.case_id not in valid_cases:
|
| 104 |
raise HTTPException(
|
|
|
|
| 108 |
|
| 109 |
# Use full UUID hex for uniqueness (no truncation)
|
| 110 |
job_id = uuid.uuid4().hex
|
|
|
|
| 111 |
backend_url = get_backend_base_url(request)
|
| 112 |
|
| 113 |
# Create job record
|
|
|
|
| 131 |
message=f"Segmentation job queued for {body.case_id}",
|
| 132 |
)
|
| 133 |
|
| 134 |
+
except HTTPException:
|
| 135 |
+
# Re-raise HTTP exceptions (400, 404, 503, etc.) as-is
|
| 136 |
+
# Without this, they'd be caught by `except Exception` and converted to 500
|
| 137 |
+
raise
|
| 138 |
except Exception:
|
| 139 |
logger.exception("Failed to create segmentation job")
|
| 140 |
raise HTTPException(status_code=500, detail="Failed to create segmentation job") from None
|
|
|
|
| 223 |
store.update_progress(job_id, 10, "Loading case data...")
|
| 224 |
|
| 225 |
# Set up output directory
|
| 226 |
+
output_dir = RESULTS_DIR / job_id
|
| 227 |
|
| 228 |
store.update_progress(job_id, 20, "Staging files for DeepISLES...")
|
| 229 |
|
|
|
|
| 271 |
# Mark as completed
|
| 272 |
store.complete_job(job_id, result_data)
|
| 273 |
|
| 274 |
+
# Note: Don't log case_id as it may be sensitive (medical domain)
|
| 275 |
logger.info(
|
| 276 |
+
"Job %s completed: dice=%.3f, time=%.1fs",
|
| 277 |
job_id,
|
|
|
|
| 278 |
result.dice_score or 0,
|
| 279 |
result.elapsed_seconds,
|
| 280 |
)
|
src/stroke_deepisles_demo/data/__init__.py
CHANGED
|
@@ -21,14 +21,21 @@ def get_case(case_id: str | int) -> CaseFiles:
|
|
| 21 |
"""
|
| 22 |
Load a single case by ID or index.
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
Returns:
|
| 25 |
CaseFiles dictionary
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
|
| 31 |
def list_case_ids() -> list[str]:
|
| 32 |
-
"""List all available case IDs.
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"""
|
| 22 |
Load a single case by ID or index.
|
| 23 |
|
| 24 |
+
Uses context manager to ensure HuggingFace temp files are cleaned up.
|
| 25 |
+
This prevents unbounded disk usage from accumulating temp NIfTI files.
|
| 26 |
+
|
| 27 |
Returns:
|
| 28 |
CaseFiles dictionary
|
| 29 |
"""
|
| 30 |
+
with load_isles_dataset() as dataset:
|
| 31 |
+
return dataset.get_case(case_id)
|
| 32 |
|
| 33 |
|
| 34 |
def list_case_ids() -> list[str]:
|
| 35 |
+
"""List all available case IDs.
|
| 36 |
+
|
| 37 |
+
Uses context manager to ensure HuggingFace temp files are cleaned up.
|
| 38 |
+
This prevents unbounded disk usage from accumulating temp NIfTI files.
|
| 39 |
+
"""
|
| 40 |
+
with load_isles_dataset() as dataset:
|
| 41 |
+
return dataset.list_case_ids()
|