VibecoderMcSwaggins commited on
Commit
a2223b1
·
unverified ·
1 Parent(s): 297e6c4

fix(ui): lazy dataset loading to prevent HF Spaces timeout (#14)

Browse files

* fix(ui): lazy load dataset to prevent HF Spaces startup timeout (P0)

Root Cause:
- `create_case_selector()` called `list_case_ids()` synchronously
- This triggered HuggingFace dataset download (~6-8 min for 149 cases)
- App failed to bind to port 7860 before HF Spaces health check timeout
- Container restarted repeatedly in a loop

Solution:
- `create_case_selector()` now returns empty dropdown immediately
- Added `initialize_case_selector()` in app.py for lazy loading
- `demo.load()` triggers data loading AFTER UI renders
- App binds to port 7860 instantly, user sees "Initializing dataset..."

Additional Fixes:
- Set `HF_HOME=/app/cache` in Dockerfile for writable cache directory
- Updated test to reflect lazy loading (no mock needed for create_app)
- Fixed lint issues (B008, W293)

Behavior Change:
- Before: Blank screen for 6-8 min, then restart loop
- After: UI loads instantly, dropdown shows "Initializing..." then populates

* fix: update dataset references from YongchengYAO to hugging-science

Replace all user-facing references to the old ISLES24-MR-Lite dataset
with the proper hugging-science/isles24-stroke dataset:
- UI markdown link and text
- Config default value
- .env.example
- Test assertion

.env.example CHANGED
@@ -3,7 +3,7 @@ STROKE_DEMO_LOG_LEVEL=INFO
3
  STROKE_DEMO_LOG_FORMAT=simple
4
 
5
  # HuggingFace
6
- STROKE_DEMO_HF_DATASET_ID=YongchengYAO/ISLES24-MR-Lite
7
  # STROKE_DEMO_HF_TOKEN=hf_...
8
 
9
  # DeepISLES
 
3
  STROKE_DEMO_LOG_FORMAT=simple
4
 
5
  # HuggingFace
6
+ STROKE_DEMO_HF_DATASET_ID=hugging-science/isles24-stroke
7
  # STROKE_DEMO_HF_TOKEN=hf_...
8
 
9
  # DeepISLES
Dockerfile CHANGED
@@ -44,6 +44,8 @@ RUN pip install --no-cache-dir --no-deps -e .
44
  # This allows the app to detect runtime environment and use direct invocation
45
  ENV HF_SPACES=1
46
  ENV DEEPISLES_DIRECT_INVOCATION=1
 
 
47
 
48
  # Create directories for data with proper permissions
49
  RUN mkdir -p /app/data /app/results /app/cache && \
 
44
  # This allows the app to detect runtime environment and use direct invocation
45
  ENV HF_SPACES=1
46
  ENV DEEPISLES_DIRECT_INVOCATION=1
47
+ # Ensure HuggingFace cache uses our writable directory
48
+ ENV HF_HOME=/app/cache
49
 
50
  # Create directories for data with proper permissions
51
  RUN mkdir -p /app/data /app/results /app/cache && \
src/stroke_deepisles_demo/core/config.py CHANGED
@@ -76,7 +76,7 @@ class Settings(BaseSettings):
76
  log_format: Literal["simple", "detailed", "json"] = "simple"
77
 
78
  # HuggingFace
79
- hf_dataset_id: str = "YongchengYAO/ISLES24-MR-Lite"
80
  hf_cache_dir: Path | None = None
81
  hf_token: str | None = Field(default=None, repr=False) # Hidden from logs
82
 
 
76
  log_format: Literal["simple", "detailed", "json"] = "simple"
77
 
78
  # HuggingFace
79
+ hf_dataset_id: str = "hugging-science/isles24-stroke"
80
  hf_cache_dir: Path | None = None
81
  hf_token: str | None = Field(default=None, repr=False) # Hidden from logs
82
 
src/stroke_deepisles_demo/ui/app.py CHANGED
@@ -9,6 +9,7 @@ import gradio as gr
9
  from matplotlib.figure import Figure # noqa: TC002
10
 
11
  from stroke_deepisles_demo.core.logging import get_logger
 
12
  from stroke_deepisles_demo.pipeline import run_pipeline_on_case
13
  from stroke_deepisles_demo.ui.components import (
14
  create_case_selector,
@@ -30,6 +31,31 @@ logger = get_logger(__name__)
30
  _previous_results_dir: Path | None = None
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def run_segmentation(
34
  case_id: str, fast_mode: bool, show_ground_truth: bool
35
  ) -> tuple[str, Figure | None, dict[str, Any], str | None, str]:
@@ -139,7 +165,7 @@ def create_app() -> gr.Blocks:
139
 
140
  This demo runs [DeepISLES](https://github.com/ezequieldlrosa/DeepIsles)
141
  stroke segmentation on cases from
142
- [ISLES24-MR-Lite](https://huggingface.co/datasets/YongchengYAO/ISLES24-MR-Lite).
143
 
144
  **Model:** SEALS (ISLES'22 winner) - Fast, accurate ischemic stroke lesion segmentation.
145
 
@@ -175,6 +201,9 @@ def create_app() -> gr.Blocks:
175
  ],
176
  )
177
 
 
 
 
178
  return demo # type: ignore[no-any-return]
179
 
180
 
 
9
  from matplotlib.figure import Figure # noqa: TC002
10
 
11
  from stroke_deepisles_demo.core.logging import get_logger
12
+ from stroke_deepisles_demo.data import list_case_ids
13
  from stroke_deepisles_demo.pipeline import run_pipeline_on_case
14
  from stroke_deepisles_demo.ui.components import (
15
  create_case_selector,
 
31
  _previous_results_dir: Path | None = None
32
 
33
 
34
+ def initialize_case_selector() -> gr.Dropdown:
35
+ """
36
+ Initialize case selector by loading dataset (lazy load).
37
+
38
+ This prevents the app from hanging during startup while downloading data.
39
+ Called via demo.load() after the UI renders.
40
+ """
41
+ try:
42
+ logger.info("Initializing dataset for case selector...")
43
+ case_ids = list_case_ids()
44
+
45
+ if not case_ids:
46
+ return gr.Dropdown(choices=[], info="No cases found in dataset.")
47
+
48
+ return gr.Dropdown(
49
+ choices=case_ids,
50
+ value=case_ids[0],
51
+ info="Choose a case from isles24-stroke dataset",
52
+ interactive=True,
53
+ )
54
+ except Exception as e:
55
+ logger.exception("Failed to initialize dataset")
56
+ return gr.Dropdown(choices=[], info=f"Error loading data: {e!s}")
57
+
58
+
59
  def run_segmentation(
60
  case_id: str, fast_mode: bool, show_ground_truth: bool
61
  ) -> tuple[str, Figure | None, dict[str, Any], str | None, str]:
 
165
 
166
  This demo runs [DeepISLES](https://github.com/ezequieldlrosa/DeepIsles)
167
  stroke segmentation on cases from
168
+ [isles24-stroke](https://huggingface.co/datasets/hugging-science/isles24-stroke).
169
 
170
  **Model:** SEALS (ISLES'22 winner) - Fast, accurate ischemic stroke lesion segmentation.
171
 
 
201
  ],
202
  )
203
 
204
+ # Trigger data loading after UI renders (prevents startup timeout)
205
+ demo.load(initialize_case_selector, outputs=[case_selector])
206
+
207
  return demo # type: ignore[no-any-return]
208
 
209
 
src/stroke_deepisles_demo/ui/components.py CHANGED
@@ -6,7 +6,6 @@ import gradio as gr
6
 
7
  from stroke_deepisles_demo.core.config import get_settings
8
  from stroke_deepisles_demo.core.logging import get_logger
9
- from stroke_deepisles_demo.data import list_case_ids
10
 
11
  logger = get_logger(__name__)
12
 
@@ -15,32 +14,18 @@ def create_case_selector() -> gr.Dropdown:
15
  """
16
  Create a dropdown for selecting cases.
17
 
 
 
18
  Returns:
19
  Configured gr.Dropdown component
20
-
21
- Raises:
22
- RuntimeError: If case IDs cannot be loaded (no silent fallback)
23
  """
24
- try:
25
- case_ids = list_case_ids()
26
- except FileNotFoundError as e:
27
- # Data directory not found - fail loudly with helpful message
28
- logger.error("Data directory not found: %s", e)
29
- raise RuntimeError("ISLES24 data not found. Please run: uv run stroke-demo download") from e
30
- except Exception as e:
31
- # Unexpected error - fail loudly, don't mask with fake dropdown option
32
- logger.exception("Failed to load case IDs")
33
- raise RuntimeError(f"Failed to load case IDs: {e}") from e
34
-
35
- if not case_ids:
36
- raise RuntimeError("No cases found in dataset. Please verify data directory structure.")
37
-
38
  return gr.Dropdown(
39
- choices=case_ids,
40
- value=case_ids[0],
41
  label="Select Case",
42
- info="Choose a case from ISLES24-MR-Lite",
43
  filterable=True,
 
44
  )
45
 
46
 
 
6
 
7
  from stroke_deepisles_demo.core.config import get_settings
8
  from stroke_deepisles_demo.core.logging import get_logger
 
9
 
10
  logger = get_logger(__name__)
11
 
 
14
  """
15
  Create a dropdown for selecting cases.
16
 
17
+ Initially empty; populated by app load event to prevent blocking startup.
18
+
19
  Returns:
20
  Configured gr.Dropdown component
 
 
 
21
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  return gr.Dropdown(
23
+ choices=[],
24
+ value=None,
25
  label="Select Case",
26
+ info="Initializing dataset... please wait.",
27
  filterable=True,
28
+ interactive=True,
29
  )
30
 
31
 
tests/core/test_config.py CHANGED
@@ -23,7 +23,7 @@ class TestSettings:
23
  """Has sensible defaults."""
24
  settings = Settings()
25
  assert settings.log_level == "INFO"
26
- assert settings.hf_dataset_id == "YongchengYAO/ISLES24-MR-Lite"
27
  assert settings.deepisles_timeout_seconds == 1800
28
  assert settings.results_dir == Path("./results")
29
 
 
23
  """Has sensible defaults."""
24
  settings = Settings()
25
  assert settings.log_level == "INFO"
26
+ assert settings.hf_dataset_id == "hugging-science/isles24-stroke"
27
  assert settings.deepisles_timeout_seconds == 1800
28
  assert settings.results_dir == Path("./results")
29
 
tests/ui/test_app.py CHANGED
@@ -18,13 +18,13 @@ def test_create_app_returns_blocks() -> None:
18
  """create_app returns a gr.Blocks instance."""
19
  import gradio as gr
20
 
21
- # Mock list_case_ids to avoid network call
22
- with patch("stroke_deepisles_demo.ui.components.list_case_ids", return_value=["sub-001"]):
23
- from stroke_deepisles_demo.ui.app import create_app
24
 
25
- app = create_app()
 
 
26
 
27
- assert isinstance(app, gr.Blocks)
28
 
29
 
30
  def test_viewer_module_imports() -> None:
 
18
  """create_app returns a gr.Blocks instance."""
19
  import gradio as gr
20
 
21
+ from stroke_deepisles_demo.ui.app import create_app
 
 
22
 
23
+ # No mock needed - create_case_selector is now lazy (empty dropdown)
24
+ # Data loading happens via demo.load() after UI renders
25
+ app = create_app()
26
 
27
+ assert isinstance(app, gr.Blocks)
28
 
29
 
30
  def test_viewer_module_imports() -> None: