VibecoderMcSwaggins commited on
Commit
1db3313
·
unverified ·
2 Parent(s): 363ba14 4a455a4

Merge pull request #12 from The-Obstacle-Is-The-Way/fix/pipeline-resource-leak

Browse files
src/stroke_deepisles_demo/pipeline.py CHANGED
@@ -25,11 +25,15 @@ logger = get_logger(__name__)
25
 
26
  @dataclass(frozen=True)
27
  class PipelineResult:
28
- """Complete result of running the pipeline on a case."""
 
 
 
 
29
 
30
  case_id: str
31
- input_files: CaseFiles
32
- staged_dir: Path
33
  prediction_mask: Path
34
  ground_truth: Path | None
35
  dice_score: float | None # None if ground truth unavailable or not computed
@@ -81,38 +85,64 @@ def run_pipeline_on_case(
81
 
82
  start_time = time.time()
83
 
84
- # 1. Load Dataset
85
- dataset = load_isles_dataset() # Uses default local path for now
86
-
87
- # Resolve ID if integer
88
- if isinstance(case_id, int):
89
- all_ids = dataset.list_case_ids()
90
- if case_id < 0 or case_id >= len(all_ids):
91
- raise IndexError(f"Case index {case_id} out of range (0-{len(all_ids) - 1})")
92
- resolved_case_id = all_ids[case_id]
93
- else:
94
- resolved_case_id = case_id
95
-
96
- # Get case files
97
- case_files = dataset.get_case(resolved_case_id)
98
-
99
- # 2. Stage Files
100
- # Use a temp dir for staging if output_dir not provided, or a subdir of output_dir
101
- if output_dir:
102
- output_dir = Path(output_dir)
103
- output_dir.mkdir(parents=True, exist_ok=True)
104
- staging_root = output_dir / "staging" / resolved_case_id
105
- results_dir = output_dir / resolved_case_id
106
- else:
107
- # If no output dir, we create a temp dir that persists (unless cleanup requested)
108
- # But wait, the user wants paths. If we use tempfile.TemporaryDirectory context,
109
- # it disappears. We should use mkdtemp or let stage_case handle it.
110
- # Let's use a temp dir for staging.
111
- base_temp = Path(tempfile.mkdtemp(prefix="deepisles_pipeline_"))
112
- staging_root = base_temp / "staging"
113
- results_dir = base_temp / "results"
114
-
115
- staged = stage_case_for_deepisles(case_files, staging_root)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # 3. Run Inference
118
  inference_result = run_deepisles_on_folder(
@@ -122,10 +152,8 @@ def run_pipeline_on_case(
122
  gpu=gpu,
123
  )
124
 
125
- # 4. Compute Metrics
126
  dice_score: float | None = None
127
- ground_truth = case_files.get("ground_truth")
128
-
129
  if compute_dice and ground_truth and ground_truth.exists():
130
  try:
131
  dice_score = metrics.compute_dice(inference_result.prediction_path, ground_truth)
@@ -140,8 +168,8 @@ def run_pipeline_on_case(
140
 
141
  return PipelineResult(
142
  case_id=resolved_case_id,
143
- input_files=case_files,
144
- staged_dir=staged.input_dir,
145
  prediction_mask=inference_result.prediction_path,
146
  ground_truth=ground_truth,
147
  dice_score=dice_score,
 
25
 
26
  @dataclass(frozen=True)
27
  class PipelineResult:
28
+ """Complete result of running the pipeline on a case.
29
+
30
+ All file paths in this result point to valid, accessible files in results_dir.
31
+ Callers are responsible for cleaning up results_dir when done (if desired).
32
+ """
33
 
34
  case_id: str
35
+ input_files: CaseFiles # Copied to results_dir; always valid paths
36
+ results_dir: Path # Directory containing all result files (for cleanup)
37
  prediction_mask: Path
38
  ground_truth: Path | None
39
  dice_score: float | None # None if ground truth unavailable or not computed
 
85
 
86
  start_time = time.time()
87
 
88
+ # Use context manager to ensure HuggingFace temp files are cleaned up
89
+ # This prevents unbounded disk usage from accumulating temp NIfTI files
90
+ with load_isles_dataset() as dataset:
91
+ # Resolve ID if integer
92
+ if isinstance(case_id, int):
93
+ all_ids = dataset.list_case_ids()
94
+ if case_id < 0 or case_id >= len(all_ids):
95
+ raise IndexError(f"Case index {case_id} out of range (0-{len(all_ids) - 1})")
96
+ resolved_case_id = all_ids[case_id]
97
+ else:
98
+ resolved_case_id = case_id
99
+
100
+ # Set up output directories (now that we have resolved_case_id)
101
+ if output_dir:
102
+ output_dir = Path(output_dir)
103
+ output_dir.mkdir(parents=True, exist_ok=True)
104
+ staging_root = output_dir / "staging" / resolved_case_id
105
+ results_dir = output_dir / resolved_case_id
106
+ else:
107
+ base_temp = Path(tempfile.mkdtemp(prefix="deepisles_pipeline_"))
108
+ staging_root = base_temp / "staging"
109
+ results_dir = base_temp / "results"
110
+
111
+ # Get case files
112
+ case_files = dataset.get_case(resolved_case_id)
113
+
114
+ # Stage files (copies DWI/ADC to staging directory)
115
+ staged = stage_case_for_deepisles(case_files, staging_root)
116
+
117
+ # Copy input files to results_dir before dataset cleanup
118
+ # (HuggingFace mode stores files in temp dirs that get cleaned up)
119
+ # This ensures all paths in PipelineResult remain valid after function returns
120
+ results_dir.mkdir(parents=True, exist_ok=True)
121
+
122
+ # Copy DWI (required for UI visualization)
123
+ dwi_dest = results_dir / f"{resolved_case_id}_dwi.nii.gz"
124
+ shutil.copy2(case_files["dwi"], dwi_dest)
125
+
126
+ # Copy ADC
127
+ adc_dest = results_dir / f"{resolved_case_id}_adc.nii.gz"
128
+ shutil.copy2(case_files["adc"], adc_dest)
129
+
130
+ # Copy ground truth if available
131
+ ground_truth: Path | None = None
132
+ original_ground_truth = case_files.get("ground_truth")
133
+ if original_ground_truth and original_ground_truth.exists():
134
+ ground_truth = results_dir / f"{resolved_case_id}_ground_truth.nii.gz"
135
+ shutil.copy2(original_ground_truth, ground_truth)
136
+
137
+ # Build input_files with copied paths (always valid after function returns)
138
+ preserved_input_files: CaseFiles = {
139
+ "dwi": dwi_dest,
140
+ "adc": adc_dest,
141
+ }
142
+ if ground_truth:
143
+ preserved_input_files["ground_truth"] = ground_truth
144
+
145
+ # Dataset temp files cleaned up here (context manager __exit__)
146
 
147
  # 3. Run Inference
148
  inference_result = run_deepisles_on_folder(
 
152
  gpu=gpu,
153
  )
154
 
155
+ # 4. Compute Metrics (using copied ground truth)
156
  dice_score: float | None = None
 
 
157
  if compute_dice and ground_truth and ground_truth.exists():
158
  try:
159
  dice_score = metrics.compute_dice(inference_result.prediction_path, ground_truth)
 
168
 
169
  return PipelineResult(
170
  case_id=resolved_case_id,
171
+ input_files=preserved_input_files,
172
+ results_dir=results_dir,
173
  prediction_mask=inference_result.prediction_path,
174
  ground_truth=ground_truth,
175
  dice_score=dice_score,
src/stroke_deepisles_demo/ui/app.py CHANGED
@@ -2,7 +2,8 @@
2
 
3
  from __future__ import annotations
4
 
5
- from typing import Any
 
6
 
7
  import gradio as gr
8
  from matplotlib.figure import Figure # noqa: TC002
@@ -20,8 +21,14 @@ from stroke_deepisles_demo.ui.viewer import (
20
  render_slice_comparison,
21
  )
22
 
 
 
 
23
  logger = get_logger(__name__)
24
 
 
 
 
25
 
26
  def run_segmentation(
27
  case_id: str, fast_mode: bool, show_ground_truth: bool
@@ -47,6 +54,13 @@ def run_segmentation(
47
  )
48
 
49
  try:
 
 
 
 
 
 
 
50
  logger.info("Running segmentation for %s", case_id)
51
  result = run_pipeline_on_case(
52
  case_id,
@@ -55,6 +69,9 @@ def run_segmentation(
55
  cleanup_staging=True,
56
  )
57
 
 
 
 
58
  # 1. NiiVue Visualization
59
  # We need data URLs for the browser
60
  # Note: This reads the file content into memory (base64)
 
2
 
3
  from __future__ import annotations
4
 
5
+ import shutil
6
+ from typing import TYPE_CHECKING, Any
7
 
8
  import gradio as gr
9
  from matplotlib.figure import Figure # noqa: TC002
 
21
  render_slice_comparison,
22
  )
23
 
24
+ if TYPE_CHECKING:
25
+ from pathlib import Path
26
+
27
  logger = get_logger(__name__)
28
 
29
+ # Shared output directory for UI results (cleaned up between runs to prevent disk accumulation)
30
+ _previous_results_dir: Path | None = None
31
+
32
 
33
  def run_segmentation(
34
  case_id: str, fast_mode: bool, show_ground_truth: bool
 
54
  )
55
 
56
  try:
57
+ global _previous_results_dir
58
+
59
+ # Clean up previous results to prevent disk accumulation on HF Spaces
60
+ if _previous_results_dir and _previous_results_dir.exists():
61
+ shutil.rmtree(_previous_results_dir, ignore_errors=True)
62
+ logger.debug("Cleaned up previous results: %s", _previous_results_dir)
63
+
64
  logger.info("Running segmentation for %s", case_id)
65
  result = run_pipeline_on_case(
66
  case_id,
 
69
  cleanup_staging=True,
70
  )
71
 
72
+ # Track results_dir for cleanup on next run
73
+ _previous_results_dir = result.results_dir
74
+
75
  # 1. NiiVue Visualization
76
  # We need data URLs for the browser
77
  # Note: This reads the file content into memory (base64)
tests/test_cli.py CHANGED
@@ -26,7 +26,7 @@ class TestCli:
26
  result = PipelineResult(
27
  case_id="sub-001",
28
  input_files=MagicMock(),
29
- staged_dir=MagicMock(),
30
  prediction_mask=MagicMock(),
31
  ground_truth=None,
32
  dice_score=None,
@@ -50,7 +50,7 @@ class TestCli:
50
  result = PipelineResult(
51
  case_id="sub-001",
52
  input_files=MagicMock(),
53
- staged_dir=MagicMock(),
54
  prediction_mask=MagicMock(),
55
  ground_truth=None,
56
  dice_score=None,
 
26
  result = PipelineResult(
27
  case_id="sub-001",
28
  input_files=MagicMock(),
29
+ results_dir=MagicMock(),
30
  prediction_mask=MagicMock(),
31
  ground_truth=None,
32
  dice_score=None,
 
50
  result = PipelineResult(
51
  case_id="sub-001",
52
  input_files=MagicMock(),
53
+ results_dir=MagicMock(),
54
  prediction_mask=MagicMock(),
55
  ground_truth=None,
56
  dice_score=None,
tests/test_pipeline.py CHANGED
@@ -35,21 +35,23 @@ class TestRunPipelineOnCase:
35
  # Configure mocks
36
  mock_dataset = MagicMock()
37
 
38
- # Mock paths that "exist"
39
- dwi_path = MagicMock(spec=Path)
40
- dwi_path.exists.return_value = True
41
- adc_path = MagicMock(spec=Path)
42
- adc_path.exists.return_value = True
43
- gt_path = MagicMock(spec=Path)
44
- gt_path.exists.return_value = True
45
 
46
  mock_dataset.get_case.return_value = CaseFiles(
47
- dwi=dwi_path,
48
- adc=adc_path,
49
- ground_truth=gt_path,
50
  # flair omitted
51
  )
52
- mock_load.return_value = mock_dataset
 
 
53
 
54
  mock_stage.return_value = MagicMock(
55
  input_dir=temp_dir / "staged",
@@ -142,15 +144,18 @@ class TestRunPipelineOnCase:
142
  def test_handles_missing_ground_truth(
143
  self,
144
  mock_dependencies: dict[str, MagicMock],
145
- temp_dir: Path, # noqa: ARG002
146
  ) -> None:
147
  """Handles cases without ground truth gracefully."""
148
- # Modify mock to return no ground truth
149
- dwi = MagicMock(spec=Path)
150
- adc = MagicMock(spec=Path)
 
 
 
151
  mock_dependencies["dataset"].get_case.return_value = CaseFiles(
152
- dwi=dwi,
153
- adc=adc,
154
  # ground_truth omitted
155
  )
156
 
@@ -231,7 +236,7 @@ class TestRunPipelineOnBatch:
231
  PipelineResult(
232
  case_id="sub-001",
233
  input_files=MagicMock(),
234
- staged_dir=MagicMock(),
235
  prediction_mask=MagicMock(),
236
  ground_truth=None,
237
  dice_score=0.8,
@@ -240,7 +245,7 @@ class TestRunPipelineOnBatch:
240
  PipelineResult(
241
  case_id="sub-002",
242
  input_files=MagicMock(),
243
- staged_dir=MagicMock(),
244
  prediction_mask=MagicMock(),
245
  ground_truth=None,
246
  dice_score=0.9,
@@ -261,7 +266,7 @@ class TestRunPipelineOnBatch:
261
  mock_run.return_value = PipelineResult(
262
  case_id="sub-001",
263
  input_files=MagicMock(),
264
- staged_dir=MagicMock(),
265
  prediction_mask=MagicMock(),
266
  ground_truth=None,
267
  dice_score=0.8,
 
35
  # Configure mocks
36
  mock_dataset = MagicMock()
37
 
38
+ # Create real temp files (pipeline copies these to results_dir)
39
+ dwi_file = temp_dir / "dwi_mock.nii.gz"
40
+ dwi_file.write_bytes(b"fake dwi nifti")
41
+ adc_file = temp_dir / "adc_mock.nii.gz"
42
+ adc_file.write_bytes(b"fake adc nifti")
43
+ gt_file = temp_dir / "gt_mock.nii.gz"
44
+ gt_file.write_bytes(b"fake gt nifti")
45
 
46
  mock_dataset.get_case.return_value = CaseFiles(
47
+ dwi=dwi_file,
48
+ adc=adc_file,
49
+ ground_truth=gt_file,
50
  # flair omitted
51
  )
52
+ # Support context manager protocol: with load_isles_dataset() as dataset:
53
+ mock_load.return_value.__enter__ = MagicMock(return_value=mock_dataset)
54
+ mock_load.return_value.__exit__ = MagicMock(return_value=None)
55
 
56
  mock_stage.return_value = MagicMock(
57
  input_dir=temp_dir / "staged",
 
144
  def test_handles_missing_ground_truth(
145
  self,
146
  mock_dependencies: dict[str, MagicMock],
147
+ temp_dir: Path,
148
  ) -> None:
149
  """Handles cases without ground truth gracefully."""
150
+ # Create real files for DWI/ADC (pipeline copies these)
151
+ dwi_file = temp_dir / "dwi_no_gt.nii.gz"
152
+ dwi_file.write_bytes(b"fake dwi")
153
+ adc_file = temp_dir / "adc_no_gt.nii.gz"
154
+ adc_file.write_bytes(b"fake adc")
155
+
156
  mock_dependencies["dataset"].get_case.return_value = CaseFiles(
157
+ dwi=dwi_file,
158
+ adc=adc_file,
159
  # ground_truth omitted
160
  )
161
 
 
236
  PipelineResult(
237
  case_id="sub-001",
238
  input_files=MagicMock(),
239
+ results_dir=MagicMock(),
240
  prediction_mask=MagicMock(),
241
  ground_truth=None,
242
  dice_score=0.8,
 
245
  PipelineResult(
246
  case_id="sub-002",
247
  input_files=MagicMock(),
248
+ results_dir=MagicMock(),
249
  prediction_mask=MagicMock(),
250
  ground_truth=None,
251
  dice_score=0.9,
 
266
  mock_run.return_value = PipelineResult(
267
  case_id="sub-001",
268
  input_files=MagicMock(),
269
+ results_dir=MagicMock(),
270
  prediction_mask=MagicMock(),
271
  ground_truth=None,
272
  dice_score=0.8,
tests/test_pipeline_cleanup.py CHANGED
@@ -4,8 +4,13 @@ from unittest.mock import MagicMock, patch
4
  from stroke_deepisles_demo.pipeline import run_pipeline_on_case
5
 
6
 
7
- def test_pipeline_cleanup_default() -> None:
8
  """Test that pipeline cleans up staging directory by default."""
 
 
 
 
 
9
 
10
  # Mock everything to avoid running actual heavy inference
11
  with (
@@ -13,13 +18,17 @@ def test_pipeline_cleanup_default() -> None:
13
  patch("stroke_deepisles_demo.pipeline.stage_case_for_deepisles") as mock_stage,
14
  patch("stroke_deepisles_demo.pipeline.run_deepisles_on_folder") as mock_run,
15
  patch("stroke_deepisles_demo.pipeline.metrics.compute_dice"),
16
- patch("shutil.rmtree") as mock_rmtree,
17
  ):
18
  # Setup mocks
19
  mock_dataset = MagicMock()
20
- mock_load.return_value = mock_dataset
21
  mock_dataset.list_case_ids.return_value = ["case1"]
22
- mock_dataset.get_case.return_value = {"dwi": Path("dwi.nii.gz")}
 
 
 
 
 
23
 
24
  mock_staged = MagicMock()
25
  mock_staged.input_dir = Path("/tmp/mock_staging")
@@ -32,7 +41,7 @@ def test_pipeline_cleanup_default() -> None:
32
  # Run pipeline with defaults (cleanup_staging=True is the default)
33
  run_pipeline_on_case("case1")
34
 
35
- # Verify that rmtree was called
36
  assert mock_rmtree.called
37
 
38
  # Get the path passed to stage_case_for_deepisles
 
4
  from stroke_deepisles_demo.pipeline import run_pipeline_on_case
5
 
6
 
7
+ def test_pipeline_cleanup_default(temp_dir: Path) -> None:
8
  """Test that pipeline cleans up staging directory by default."""
9
+ # Create real files (pipeline now copies input files to results_dir)
10
+ dwi_file = temp_dir / "dwi.nii.gz"
11
+ dwi_file.write_bytes(b"fake dwi")
12
+ adc_file = temp_dir / "adc.nii.gz"
13
+ adc_file.write_bytes(b"fake adc")
14
 
15
  # Mock everything to avoid running actual heavy inference
16
  with (
 
18
  patch("stroke_deepisles_demo.pipeline.stage_case_for_deepisles") as mock_stage,
19
  patch("stroke_deepisles_demo.pipeline.run_deepisles_on_folder") as mock_run,
20
  patch("stroke_deepisles_demo.pipeline.metrics.compute_dice"),
21
+ patch("stroke_deepisles_demo.pipeline.shutil.rmtree") as mock_rmtree,
22
  ):
23
  # Setup mocks
24
  mock_dataset = MagicMock()
 
25
  mock_dataset.list_case_ids.return_value = ["case1"]
26
+ # Return dict with real files (no ground_truth)
27
+ mock_dataset.get_case.return_value = {"dwi": dwi_file, "adc": adc_file}
28
+
29
+ # Support context manager protocol: with load_isles_dataset() as dataset:
30
+ mock_load.return_value.__enter__ = MagicMock(return_value=mock_dataset)
31
+ mock_load.return_value.__exit__ = MagicMock(return_value=None)
32
 
33
  mock_staged = MagicMock()
34
  mock_staged.input_dir = Path("/tmp/mock_staging")
 
41
  # Run pipeline with defaults (cleanup_staging=True is the default)
42
  run_pipeline_on_case("case1")
43
 
44
+ # Verify that rmtree was called (for staging cleanup)
45
  assert mock_rmtree.called
46
 
47
  # Get the path passed to stage_case_for_deepisles
tests/ui/test_app.py CHANGED
@@ -51,7 +51,7 @@ def test_run_segmentation_logic() -> None:
51
  mock_result = PipelineResult(
52
  case_id="sub-001",
53
  input_files={"dwi": MagicMock(), "adc": MagicMock()},
54
- staged_dir=MagicMock(),
55
  prediction_mask=MagicMock(),
56
  ground_truth=MagicMock(),
57
  dice_score=0.85,
 
51
  mock_result = PipelineResult(
52
  case_id="sub-001",
53
  input_files={"dwi": MagicMock(), "adc": MagicMock()},
54
+ results_dir=MagicMock(),
55
  prediction_mask=MagicMock(),
56
  ground_truth=MagicMock(),
57
  dice_score=0.85,