raylim Claude Sonnet 4.5 commited on
Commit
52f8cb9
·
unverified ·
1 Parent(s): 751062d

Fix circular import by moving get_data_directory to separate module

Browse files

Moved get_data_directory() function from gradio_app.py to a new
data_directory.py module to avoid circular imports. The circular import
occurred because gradio_app.py imports from ui/utils.py, which was trying
to import back from gradio_app.py.

Changes:
- Create src/mosaic/data_directory.py with get_data_directory() and
set_data_directory() functions
- Update gradio_app.py to import and use set_data_directory()
- Update all other modules (aeon.py, data.py, utils.py, analysis.py) to
import from data_directory module instead of gradio_app

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

src/mosaic/analysis.py CHANGED
@@ -61,7 +61,7 @@ from mussel.utils.segment import draw_slide_mask
61
  from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
62
  from loguru import logger
63
  from mosaic.inference import run_aeon, run_paladin
64
- from mosaic.gradio_app import get_data_directory
65
 
66
  # Log hardware detection at module load
67
  logger.info(f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}")
 
61
  from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
62
  from loguru import logger
63
  from mosaic.inference import run_aeon, run_paladin
64
+ from mosaic.data_directory import get_data_directory
65
 
66
  # Log hardware detection at module load
67
  logger.info(f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}")
src/mosaic/data_directory.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data directory management for model files.
2
+
3
+ This module provides a centralized function to locate the directory containing
4
+ model data files, checking environment variables and falling back to local paths.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+
11
+ # Global variable to store the model data directory path
12
+ _MODEL_DATA_DIR = None
13
+
14
+
15
+ def get_data_directory():
16
+ """Get the directory containing model data files.
17
+
18
+ Returns the HuggingFace cache directory path for the model repository,
19
+ or falls back to local 'data/' directory if not yet downloaded.
20
+
21
+ Returns:
22
+ Path: Path to the model data directory
23
+ """
24
+ global _MODEL_DATA_DIR
25
+ if _MODEL_DATA_DIR is not None:
26
+ return _MODEL_DATA_DIR
27
+
28
+ # Check if environment variable is set
29
+ if "MOSAIC_DATA_DIR" in os.environ:
30
+ _MODEL_DATA_DIR = Path(os.environ["MOSAIC_DATA_DIR"])
31
+ return _MODEL_DATA_DIR
32
+
33
+ # Check if local data/ directory exists (for development/backward compat)
34
+ local_data = Path("data")
35
+ if local_data.exists() and (local_data / "paladin_model_map.csv").exists():
36
+ _MODEL_DATA_DIR = local_data
37
+ return _MODEL_DATA_DIR
38
+
39
+ # Fall back to repo root data/ directory
40
+ _MODEL_DATA_DIR = local_data
41
+ return _MODEL_DATA_DIR
42
+
43
+
44
+ def set_data_directory(path):
45
+ """Set the data directory path.
46
+
47
+ Args:
48
+ path: Path to the data directory
49
+ """
50
+ global _MODEL_DATA_DIR
51
+ _MODEL_DATA_DIR = Path(path)
52
+ os.environ["MOSAIC_DATA_DIR"] = str(_MODEL_DATA_DIR)
src/mosaic/gradio_app.py CHANGED
@@ -12,8 +12,8 @@ import pandas as pd
12
  from pathlib import Path
13
  from huggingface_hub import snapshot_download
14
  from loguru import logger
15
- import os
16
 
 
17
  from mosaic.ui import launch_gradio
18
  from mosaic.ui.app import set_cancer_subtype_maps
19
  from mosaic.ui.utils import (
@@ -26,38 +26,6 @@ from mosaic.ui.utils import (
26
  )
27
  from mosaic.analysis import analyze_slide
28
 
29
- # Global variable to store the model data directory path
30
- _MODEL_DATA_DIR = None
31
-
32
-
33
- def get_data_directory():
34
- """Get the directory containing model data files.
35
-
36
- Returns the HuggingFace cache directory path for the model repository,
37
- or falls back to local 'data/' directory if not yet downloaded.
38
-
39
- Returns:
40
- Path: Path to the model data directory
41
- """
42
- global _MODEL_DATA_DIR
43
- if _MODEL_DATA_DIR is not None:
44
- return _MODEL_DATA_DIR
45
-
46
- # Check if environment variable is set
47
- if "MOSAIC_DATA_DIR" in os.environ:
48
- _MODEL_DATA_DIR = Path(os.environ["MOSAIC_DATA_DIR"])
49
- return _MODEL_DATA_DIR
50
-
51
- # Check if local data/ directory exists (for development/backward compat)
52
- local_data = Path("data")
53
- if local_data.exists() and (local_data / "paladin_model_map.csv").exists():
54
- _MODEL_DATA_DIR = local_data
55
- return _MODEL_DATA_DIR
56
-
57
- # Fall back to repo root data/ directory
58
- _MODEL_DATA_DIR = local_data
59
- return _MODEL_DATA_DIR
60
-
61
 
62
  def download_and_process_models():
63
  """Download models from HuggingFace and initialize cancer subtype mappings.
@@ -72,8 +40,6 @@ def download_and_process_models():
72
  - reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
73
  - cancer_subtypes: List of all supported cancer subtype codes
74
  """
75
- global _MODEL_DATA_DIR
76
-
77
  # Download to HF cache directory (not local_dir)
78
  # This returns the path to the cached snapshot
79
  logger.info("Downloading models from HuggingFace Hub to cache directory...")
@@ -81,14 +47,13 @@ def download_and_process_models():
81
  repo_id="PDM-Group/paladin-aeon-models",
82
  # No local_dir - use HF cache
83
  )
84
- _MODEL_DATA_DIR = Path(cache_dir)
85
- logger.info(f"Models downloaded to: {_MODEL_DATA_DIR}")
86
 
87
- # Also set environment variable for other modules to use
88
- os.environ["MOSAIC_DATA_DIR"] = str(_MODEL_DATA_DIR)
89
 
90
  model_map = pd.read_csv(
91
- _MODEL_DATA_DIR / "paladin_model_map.csv",
92
  )
93
  cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
94
  cancer_subtype_name_map = {"Unknown": "UNK"}
 
12
  from pathlib import Path
13
  from huggingface_hub import snapshot_download
14
  from loguru import logger
 
15
 
16
+ from mosaic.data_directory import set_data_directory
17
  from mosaic.ui import launch_gradio
18
  from mosaic.ui.app import set_cancer_subtype_maps
19
  from mosaic.ui.utils import (
 
26
  )
27
  from mosaic.analysis import analyze_slide
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def download_and_process_models():
31
  """Download models from HuggingFace and initialize cancer subtype mappings.
 
40
  - reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
41
  - cancer_subtypes: List of all supported cancer subtype codes
42
  """
 
 
43
  # Download to HF cache directory (not local_dir)
44
  # This returns the path to the cached snapshot
45
  logger.info("Downloading models from HuggingFace Hub to cache directory...")
 
47
  repo_id="PDM-Group/paladin-aeon-models",
48
  # No local_dir - use HF cache
49
  )
50
+ logger.info(f"Models downloaded to: {cache_dir}")
 
51
 
52
+ # Set the data directory for other modules to use
53
+ set_data_directory(cache_dir)
54
 
55
  model_map = pd.read_csv(
56
+ Path(cache_dir) / "paladin_model_map.csv",
57
  )
58
  cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
59
  cancer_subtype_name_map = {"Unknown": "UNK"}
src/mosaic/inference/aeon.py CHANGED
@@ -22,7 +22,7 @@ from mosaic.inference.data import (
22
  )
23
 
24
  from loguru import logger
25
- from mosaic.gradio_app import get_data_directory
26
 
27
  # Cancer types excluded from prediction (too broad or ambiguous)
28
  # These are used to mask out predictions for overly general cancer types
 
22
  )
23
 
24
  from loguru import logger
25
+ from mosaic.data_directory import get_data_directory
26
 
27
  # Cancer types excluded from prediction (too broad or ambiguous)
28
  # These are used to mask out predictions for overly general cancer types
src/mosaic/inference/data.py CHANGED
@@ -13,7 +13,7 @@ import torch
13
  from torch.utils.data import Dataset
14
  import numpy as np
15
 
16
- from mosaic.gradio_app import get_data_directory
17
 
18
  CANCER_TYPE_TO_INT_MAP = {
19
  "AASTR": 0,
 
13
  from torch.utils.data import Dataset
14
  import numpy as np
15
 
16
+ from mosaic.data_directory import get_data_directory
17
 
18
  CANCER_TYPE_TO_INT_MAP = {
19
  "AASTR": 0,
src/mosaic/ui/utils.py CHANGED
@@ -13,7 +13,7 @@ import pandas as pd
13
  import gradio as gr
14
  import requests
15
 
16
- from mosaic.gradio_app import get_data_directory
17
 
18
  # This path should be outside your project directory if running locally
19
  TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
 
13
  import gradio as gr
14
  import requests
15
 
16
+ from mosaic.data_directory import get_data_directory
17
 
18
  # This path should be outside your project directory if running locally
19
  TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"