Spaces:
Running on Zero
Running on Zero
Fix circular import by moving get_data_directory to separate module
Browse filesMoved get_data_directory() function from gradio_app.py to a new
data_directory.py module to avoid circular imports. The circular import
occurred because gradio_app.py imports from ui/utils.py, which was trying
to import back from gradio_app.py.
Changes:
- Create src/mosaic/data_directory.py with get_data_directory() and
set_data_directory() functions
- Update gradio_app.py to import and use set_data_directory()
- Update all other modules (aeon.py, data.py, utils.py, analysis.py) to
import from data_directory module instead of gradio_app
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- src/mosaic/analysis.py +1 -1
- src/mosaic/data_directory.py +52 -0
- src/mosaic/gradio_app.py +5 -40
- src/mosaic/inference/aeon.py +1 -1
- src/mosaic/inference/data.py +1 -1
- src/mosaic/ui/utils.py +1 -1
src/mosaic/analysis.py
CHANGED
|
@@ -61,7 +61,7 @@ from mussel.utils.segment import draw_slide_mask
|
|
| 61 |
from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
|
| 62 |
from loguru import logger
|
| 63 |
from mosaic.inference import run_aeon, run_paladin
|
| 64 |
-
from mosaic.
|
| 65 |
|
| 66 |
# Log hardware detection at module load
|
| 67 |
logger.info(f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}")
|
|
|
|
| 61 |
from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
|
| 62 |
from loguru import logger
|
| 63 |
from mosaic.inference import run_aeon, run_paladin
|
| 64 |
+
from mosaic.data_directory import get_data_directory
|
| 65 |
|
| 66 |
# Log hardware detection at module load
|
| 67 |
logger.info(f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}")
|
src/mosaic/data_directory.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data directory management for model files.
|
| 2 |
+
|
| 3 |
+
This module provides a centralized function to locate the directory containing
|
| 4 |
+
model data files, checking environment variables and falling back to local paths.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Global variable to store the model data directory path
|
| 12 |
+
_MODEL_DATA_DIR = None
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_data_directory():
|
| 16 |
+
"""Get the directory containing model data files.
|
| 17 |
+
|
| 18 |
+
Returns the HuggingFace cache directory path for the model repository,
|
| 19 |
+
or falls back to local 'data/' directory if not yet downloaded.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Path: Path to the model data directory
|
| 23 |
+
"""
|
| 24 |
+
global _MODEL_DATA_DIR
|
| 25 |
+
if _MODEL_DATA_DIR is not None:
|
| 26 |
+
return _MODEL_DATA_DIR
|
| 27 |
+
|
| 28 |
+
# Check if environment variable is set
|
| 29 |
+
if "MOSAIC_DATA_DIR" in os.environ:
|
| 30 |
+
_MODEL_DATA_DIR = Path(os.environ["MOSAIC_DATA_DIR"])
|
| 31 |
+
return _MODEL_DATA_DIR
|
| 32 |
+
|
| 33 |
+
# Check if local data/ directory exists (for development/backward compat)
|
| 34 |
+
local_data = Path("data")
|
| 35 |
+
if local_data.exists() and (local_data / "paladin_model_map.csv").exists():
|
| 36 |
+
_MODEL_DATA_DIR = local_data
|
| 37 |
+
return _MODEL_DATA_DIR
|
| 38 |
+
|
| 39 |
+
# Fall back to repo root data/ directory
|
| 40 |
+
_MODEL_DATA_DIR = local_data
|
| 41 |
+
return _MODEL_DATA_DIR
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def set_data_directory(path):
|
| 45 |
+
"""Set the data directory path.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
path: Path to the data directory
|
| 49 |
+
"""
|
| 50 |
+
global _MODEL_DATA_DIR
|
| 51 |
+
_MODEL_DATA_DIR = Path(path)
|
| 52 |
+
os.environ["MOSAIC_DATA_DIR"] = str(_MODEL_DATA_DIR)
|
src/mosaic/gradio_app.py
CHANGED
|
@@ -12,8 +12,8 @@ import pandas as pd
|
|
| 12 |
from pathlib import Path
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from loguru import logger
|
| 15 |
-
import os
|
| 16 |
|
|
|
|
| 17 |
from mosaic.ui import launch_gradio
|
| 18 |
from mosaic.ui.app import set_cancer_subtype_maps
|
| 19 |
from mosaic.ui.utils import (
|
|
@@ -26,38 +26,6 @@ from mosaic.ui.utils import (
|
|
| 26 |
)
|
| 27 |
from mosaic.analysis import analyze_slide
|
| 28 |
|
| 29 |
-
# Global variable to store the model data directory path
|
| 30 |
-
_MODEL_DATA_DIR = None
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
def get_data_directory():
|
| 34 |
-
"""Get the directory containing model data files.
|
| 35 |
-
|
| 36 |
-
Returns the HuggingFace cache directory path for the model repository,
|
| 37 |
-
or falls back to local 'data/' directory if not yet downloaded.
|
| 38 |
-
|
| 39 |
-
Returns:
|
| 40 |
-
Path: Path to the model data directory
|
| 41 |
-
"""
|
| 42 |
-
global _MODEL_DATA_DIR
|
| 43 |
-
if _MODEL_DATA_DIR is not None:
|
| 44 |
-
return _MODEL_DATA_DIR
|
| 45 |
-
|
| 46 |
-
# Check if environment variable is set
|
| 47 |
-
if "MOSAIC_DATA_DIR" in os.environ:
|
| 48 |
-
_MODEL_DATA_DIR = Path(os.environ["MOSAIC_DATA_DIR"])
|
| 49 |
-
return _MODEL_DATA_DIR
|
| 50 |
-
|
| 51 |
-
# Check if local data/ directory exists (for development/backward compat)
|
| 52 |
-
local_data = Path("data")
|
| 53 |
-
if local_data.exists() and (local_data / "paladin_model_map.csv").exists():
|
| 54 |
-
_MODEL_DATA_DIR = local_data
|
| 55 |
-
return _MODEL_DATA_DIR
|
| 56 |
-
|
| 57 |
-
# Fall back to repo root data/ directory
|
| 58 |
-
_MODEL_DATA_DIR = local_data
|
| 59 |
-
return _MODEL_DATA_DIR
|
| 60 |
-
|
| 61 |
|
| 62 |
def download_and_process_models():
|
| 63 |
"""Download models from HuggingFace and initialize cancer subtype mappings.
|
|
@@ -72,8 +40,6 @@ def download_and_process_models():
|
|
| 72 |
- reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
|
| 73 |
- cancer_subtypes: List of all supported cancer subtype codes
|
| 74 |
"""
|
| 75 |
-
global _MODEL_DATA_DIR
|
| 76 |
-
|
| 77 |
# Download to HF cache directory (not local_dir)
|
| 78 |
# This returns the path to the cached snapshot
|
| 79 |
logger.info("Downloading models from HuggingFace Hub to cache directory...")
|
|
@@ -81,14 +47,13 @@ def download_and_process_models():
|
|
| 81 |
repo_id="PDM-Group/paladin-aeon-models",
|
| 82 |
# No local_dir - use HF cache
|
| 83 |
)
|
| 84 |
-
|
| 85 |
-
logger.info(f"Models downloaded to: {_MODEL_DATA_DIR}")
|
| 86 |
|
| 87 |
-
#
|
| 88 |
-
|
| 89 |
|
| 90 |
model_map = pd.read_csv(
|
| 91 |
-
|
| 92 |
)
|
| 93 |
cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
|
| 94 |
cancer_subtype_name_map = {"Unknown": "UNK"}
|
|
|
|
| 12 |
from pathlib import Path
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from loguru import logger
|
|
|
|
| 15 |
|
| 16 |
+
from mosaic.data_directory import set_data_directory
|
| 17 |
from mosaic.ui import launch_gradio
|
| 18 |
from mosaic.ui.app import set_cancer_subtype_maps
|
| 19 |
from mosaic.ui.utils import (
|
|
|
|
| 26 |
)
|
| 27 |
from mosaic.analysis import analyze_slide
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def download_and_process_models():
|
| 31 |
"""Download models from HuggingFace and initialize cancer subtype mappings.
|
|
|
|
| 40 |
- reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
|
| 41 |
- cancer_subtypes: List of all supported cancer subtype codes
|
| 42 |
"""
|
|
|
|
|
|
|
| 43 |
# Download to HF cache directory (not local_dir)
|
| 44 |
# This returns the path to the cached snapshot
|
| 45 |
logger.info("Downloading models from HuggingFace Hub to cache directory...")
|
|
|
|
| 47 |
repo_id="PDM-Group/paladin-aeon-models",
|
| 48 |
# No local_dir - use HF cache
|
| 49 |
)
|
| 50 |
+
logger.info(f"Models downloaded to: {cache_dir}")
|
|
|
|
| 51 |
|
| 52 |
+
# Set the data directory for other modules to use
|
| 53 |
+
set_data_directory(cache_dir)
|
| 54 |
|
| 55 |
model_map = pd.read_csv(
|
| 56 |
+
Path(cache_dir) / "paladin_model_map.csv",
|
| 57 |
)
|
| 58 |
cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
|
| 59 |
cancer_subtype_name_map = {"Unknown": "UNK"}
|
src/mosaic/inference/aeon.py
CHANGED
|
@@ -22,7 +22,7 @@ from mosaic.inference.data import (
|
|
| 22 |
)
|
| 23 |
|
| 24 |
from loguru import logger
|
| 25 |
-
from mosaic.
|
| 26 |
|
| 27 |
# Cancer types excluded from prediction (too broad or ambiguous)
|
| 28 |
# These are used to mask out predictions for overly general cancer types
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
from loguru import logger
|
| 25 |
+
from mosaic.data_directory import get_data_directory
|
| 26 |
|
| 27 |
# Cancer types excluded from prediction (too broad or ambiguous)
|
| 28 |
# These are used to mask out predictions for overly general cancer types
|
src/mosaic/inference/data.py
CHANGED
|
@@ -13,7 +13,7 @@ import torch
|
|
| 13 |
from torch.utils.data import Dataset
|
| 14 |
import numpy as np
|
| 15 |
|
| 16 |
-
from mosaic.
|
| 17 |
|
| 18 |
CANCER_TYPE_TO_INT_MAP = {
|
| 19 |
"AASTR": 0,
|
|
|
|
| 13 |
from torch.utils.data import Dataset
|
| 14 |
import numpy as np
|
| 15 |
|
| 16 |
+
from mosaic.data_directory import get_data_directory
|
| 17 |
|
| 18 |
CANCER_TYPE_TO_INT_MAP = {
|
| 19 |
"AASTR": 0,
|
src/mosaic/ui/utils.py
CHANGED
|
@@ -13,7 +13,7 @@ import pandas as pd
|
|
| 13 |
import gradio as gr
|
| 14 |
import requests
|
| 15 |
|
| 16 |
-
from mosaic.
|
| 17 |
|
| 18 |
# This path should be outside your project directory if running locally
|
| 19 |
TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
|
|
|
|
| 13 |
import gradio as gr
|
| 14 |
import requests
|
| 15 |
|
| 16 |
+
from mosaic.data_directory import get_data_directory
|
| 17 |
|
| 18 |
# This path should be outside your project directory if running locally
|
| 19 |
TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
|