Improve Docker configuration for HuggingFace cache and permissions
Browse files- Add HF_HOME and TRANSFORMERS_CACHE env vars with mounted cache directory
- Increase shared memory from 500m to 3G
- Add HF_TOKEN validation to all Docker targets
- Create output directory before Docker runs to ensure proper ownership
- Add ARGS variable support for passing extra arguments to mosaic
- Update volume mount paths to use /mnt/slides, /mnt/data, /mnt/output
- Add docker-run-cli target for running arbitrary mosaic CLI commands
- Fix mosaic script to use mskmind/mosaic image and consistent formatting
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Makefile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
.PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-push docker-clean run-ui run-cli
|
| 2 |
|
| 3 |
# Default target
|
| 4 |
.DEFAULT_GOAL := help
|
|
@@ -75,14 +75,19 @@ run-ui: ## Launch Gradio web interface
|
|
| 75 |
run-ui-public: ## Launch Gradio web interface with public sharing
|
| 76 |
$(PYTHON) -m mosaic.gradio_app --share
|
| 77 |
|
| 78 |
-
run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir)
|
| 79 |
-
$(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT)
|
| 80 |
|
| 81 |
-
run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir)
|
| 82 |
-
$(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT)
|
| 83 |
|
| 84 |
##@ Docker
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
docker-build: ## Build Docker image with SSH forwarding
|
| 87 |
@echo "Building Docker image with SSH authentication..."
|
| 88 |
@./build.sh
|
|
@@ -96,38 +101,65 @@ docker-build-no-cache: ## Build Docker image without cache
|
|
| 96 |
eval "$$(ssh-agent -k)"
|
| 97 |
|
| 98 |
docker-run: ## Run Docker container (web UI mode)
|
|
|
|
| 99 |
docker run -it --rm \
|
| 100 |
-
|
|
|
|
| 101 |
-p 7860:7860 \
|
| 102 |
-
-v $(PWD)/data:/
|
| 103 |
-
-v $(PWD)/output:/
|
| 104 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 105 |
|
| 106 |
-
docker-run-
|
|
|
|
| 107 |
docker run -it --rm \
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
-v $(PWD)/
|
|
|
|
|
|
|
| 111 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 112 |
-
|
| 113 |
-
--output-dir /app/output
|
| 114 |
|
| 115 |
-
docker-run-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
docker run -it --rm \
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
-v $(
|
|
|
|
|
|
|
| 120 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 121 |
-
--slide-csv /
|
| 122 |
-
--output-dir /
|
|
|
|
| 123 |
|
| 124 |
docker-shell: ## Open shell in Docker container
|
|
|
|
| 125 |
docker run -it --rm \
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
-v $(PWD)/
|
| 129 |
-
$(
|
| 130 |
-
/bin/bash
|
|
|
|
| 131 |
|
| 132 |
docker-tag: ## Tag Docker image for registry
|
| 133 |
docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
|
|
|
| 1 |
+
.PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-run-cli docker-push docker-clean run-ui run-cli
|
| 2 |
|
| 3 |
# Default target
|
| 4 |
.DEFAULT_GOAL := help
|
|
|
|
| 75 |
run-ui-public: ## Launch Gradio web interface with public sharing
|
| 76 |
$(PYTHON) -m mosaic.gradio_app --share
|
| 77 |
|
| 78 |
+
run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir [ARGS="--extra-args"])
|
| 79 |
+
$(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT) $(ARGS)
|
| 80 |
|
| 81 |
+
run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir [ARGS="--extra-args"])
|
| 82 |
+
$(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT) $(ARGS)
|
| 83 |
|
| 84 |
##@ Docker
|
| 85 |
|
| 86 |
+
# Docker run options matching the mosaic entrypoint script
|
| 87 |
+
DOCKER_GPU_ARGS := --gpus=all --runtime=nvidia
|
| 88 |
+
HF_CACHE_DIR := $(HOME)/.cache/huggingface
|
| 89 |
+
DOCKER_COMMON_ARGS := --shm-size=3G --env HF_TOKEN="$(HF_TOKEN)" --env HF_HOME=/mnt/hf_cache --env TRANSFORMERS_CACHE=/mnt/hf_cache --user $(shell id -u):$(shell id -g) -v $(HF_CACHE_DIR):/mnt/hf_cache
|
| 90 |
+
|
| 91 |
docker-build: ## Build Docker image with SSH forwarding
|
| 92 |
@echo "Building Docker image with SSH authentication..."
|
| 93 |
@./build.sh
|
|
|
|
| 101 |
eval "$$(ssh-agent -k)"
|
| 102 |
|
| 103 |
docker-run: ## Run Docker container (web UI mode)
|
| 104 |
+
@test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
|
| 105 |
docker run -it --rm \
|
| 106 |
+
$(DOCKER_GPU_ARGS) \
|
| 107 |
+
$(DOCKER_COMMON_ARGS) \
|
| 108 |
-p 7860:7860 \
|
| 109 |
+
-v $(PWD)/data:/mnt/data:ro \
|
| 110 |
+
-v $(PWD)/output:/mnt/output \
|
| 111 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 112 |
|
| 113 |
+
docker-run-cli: ## Run Docker container with mosaic CLI (usage: make docker-run-cli ARGS="--help")
|
| 114 |
+
@test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
|
| 115 |
docker run -it --rm \
|
| 116 |
+
$(DOCKER_GPU_ARGS) \
|
| 117 |
+
$(DOCKER_COMMON_ARGS) \
|
| 118 |
+
-v $(PWD)/data:/mnt/data:ro \
|
| 119 |
+
-v $(PWD)/output:/mnt/output \
|
| 120 |
+
--entrypoint mosaic \
|
| 121 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 122 |
+
$(ARGS)
|
|
|
|
| 123 |
|
| 124 |
+
docker-run-single: ## Run Docker container (single slide mode, usage: make docker-run-single SLIDE=path/to/slide.svs [ARGS="--extra-args"])
|
| 125 |
+
@test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
|
| 126 |
+
@test -n "$(SLIDE)" || (echo "Error: SLIDE variable is required" && exit 1)
|
| 127 |
+
@mkdir -p $(PWD)/output
|
| 128 |
+
docker run -it --rm \
|
| 129 |
+
$(DOCKER_GPU_ARGS) \
|
| 130 |
+
$(DOCKER_COMMON_ARGS) \
|
| 131 |
+
-v $(dir $(abspath $(SLIDE))):/mnt/slides:ro \
|
| 132 |
+
-v $(PWD)/output:/mnt/output \
|
| 133 |
+
--entrypoint mosaic \
|
| 134 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 135 |
+
--slide-path /mnt/slides/$(notdir $(SLIDE)) \
|
| 136 |
+
--output-dir /mnt/output \
|
| 137 |
+
$(ARGS)
|
| 138 |
+
|
| 139 |
+
docker-run-batch: ## Run Docker container (batch mode, usage: make docker-run-batch CSV=path/to/slides.csv [ARGS="--extra-args"])
|
| 140 |
+
@test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
|
| 141 |
+
@test -n "$(CSV)" || (echo "Error: CSV variable is required" && exit 1)
|
| 142 |
+
@mkdir -p $(PWD)/output
|
| 143 |
docker run -it --rm \
|
| 144 |
+
$(DOCKER_GPU_ARGS) \
|
| 145 |
+
$(DOCKER_COMMON_ARGS) \
|
| 146 |
+
-v $(dir $(abspath $(CSV))):/mnt/data:ro \
|
| 147 |
+
-v $(PWD)/output:/mnt/output \
|
| 148 |
+
--entrypoint mosaic \
|
| 149 |
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 150 |
+
--slide-csv /mnt/data/$(notdir $(CSV)) \
|
| 151 |
+
--output-dir /mnt/output \
|
| 152 |
+
$(ARGS)
|
| 153 |
|
| 154 |
docker-shell: ## Open shell in Docker container
|
| 155 |
+
@test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
|
| 156 |
docker run -it --rm \
|
| 157 |
+
$(DOCKER_GPU_ARGS) \
|
| 158 |
+
$(DOCKER_COMMON_ARGS) \
|
| 159 |
+
-v $(PWD)/data:/mnt/data:ro \
|
| 160 |
+
-v $(PWD)/output:/mnt/output \
|
| 161 |
+
--entrypoint /bin/bash \
|
| 162 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 163 |
|
| 164 |
docker-tag: ## Tag Docker image for registry
|
| 165 |
docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
mosaic
CHANGED
|
@@ -20,10 +20,14 @@
|
|
| 20 |
# in the same directory tree. Slides can be in subdirectories relative to
|
| 21 |
# the CSV file location.
|
| 22 |
|
| 23 |
-
DOCKER_IMAGE="
|
| 24 |
USE_GPU=true
|
|
|
|
| 25 |
|
| 26 |
-
die
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Check if Docker is available
|
| 29 |
command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH"
|
|
@@ -31,13 +35,16 @@ command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH
|
|
| 31 |
# Check if HF_TOKEN is set
|
| 32 |
[[ -n $HF_TOKEN ]] || die "HF_TOKEN environment variable is not set. Please set it to your HuggingFace access token."
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
# Check if nvidia runtime is available
|
| 35 |
if docker info 2>/dev/null | grep -q nvidia; then
|
| 36 |
-
|
| 37 |
else
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
fi
|
| 42 |
|
| 43 |
# Parse arguments to identify paths that need to be mounted
|
|
@@ -45,73 +52,75 @@ VOLUME_MOUNTS=()
|
|
| 45 |
ARGS=()
|
| 46 |
|
| 47 |
while [[ $# -gt 0 ]]; do
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
done
|
| 102 |
|
| 103 |
# Remove duplicate volume mounts
|
| 104 |
UNIQUE_MOUNTS=$(printf '%s\n' "${VOLUME_MOUNTS[@]}" | awk '!seen[$0]++')
|
| 105 |
VOLUME_ARGS=()
|
| 106 |
while IFS= read -r mount; do
|
| 107 |
-
|
| 108 |
-
done <<<
|
| 109 |
|
| 110 |
# Run the Docker container with the mosaic CLI
|
| 111 |
# Override the entrypoint to run mosaic command instead of gradio_app
|
| 112 |
GPU_ARGS=()
|
| 113 |
if [ "$USE_GPU" = true ]; then
|
| 114 |
-
|
| 115 |
fi
|
| 116 |
|
| 117 |
# Get current user ID and group ID to ensure output files have correct ownership
|
|
@@ -119,11 +128,14 @@ USER_ID=$(id -u)
|
|
| 119 |
GROUP_ID=$(id -g)
|
| 120 |
|
| 121 |
docker run --rm \
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# in the same directory tree. Slides can be in subdirectories relative to
|
| 21 |
# the CSV file location.
|
| 22 |
|
| 23 |
+
DOCKER_IMAGE="mskmind/mosaic"
|
| 24 |
USE_GPU=true
|
| 25 |
+
HF_CACHE_DIR="${HOME}/.cache/huggingface"
|
| 26 |
|
| 27 |
+
die() {
|
| 28 |
+
echo "FATAL: $*" >&2
|
| 29 |
+
exit 1
|
| 30 |
+
}
|
| 31 |
|
| 32 |
# Check if Docker is available
|
| 33 |
command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH"
|
|
|
|
| 35 |
# Check if HF_TOKEN is set
|
| 36 |
[[ -n $HF_TOKEN ]] || die "HF_TOKEN environment variable is not set. Please set it to your HuggingFace access token."
|
| 37 |
|
| 38 |
+
# Create HuggingFace cache directory if it doesn't exist
|
| 39 |
+
mkdir -p "$HF_CACHE_DIR"
|
| 40 |
+
|
| 41 |
# Check if nvidia runtime is available
|
| 42 |
if docker info 2>/dev/null | grep -q nvidia; then
|
| 43 |
+
USE_GPU=true
|
| 44 |
else
|
| 45 |
+
echo "Warning: NVIDIA Docker runtime not detected. Running without GPU support." >&2
|
| 46 |
+
echo " To enable GPU support, install the NVIDIA Container Toolkit." >&2
|
| 47 |
+
USE_GPU=false
|
| 48 |
fi
|
| 49 |
|
| 50 |
# Parse arguments to identify paths that need to be mounted
|
|
|
|
| 52 |
ARGS=()
|
| 53 |
|
| 54 |
while [[ $# -gt 0 ]]; do
|
| 55 |
+
ARG="$1"
|
| 56 |
+
case "$ARG" in
|
| 57 |
+
--slide-path)
|
| 58 |
+
shift
|
| 59 |
+
SLIDE_PATH="$1"
|
| 60 |
+
if [[ -n "$SLIDE_PATH" ]]; then
|
| 61 |
+
# Convert to absolute path
|
| 62 |
+
SLIDE_PATH=$(readlink -f "$SLIDE_PATH")
|
| 63 |
+
SLIDE_DIR=$(dirname "$SLIDE_PATH")
|
| 64 |
+
SLIDE_FILE=$(basename "$SLIDE_PATH")
|
| 65 |
+
# Mount the directory containing the slide
|
| 66 |
+
VOLUME_MOUNTS+=("-v" "$SLIDE_DIR:/mnt/slides:ro")
|
| 67 |
+
# Pass the container path to mosaic
|
| 68 |
+
ARGS+=("--slide-path" "/mnt/slides/$SLIDE_FILE")
|
| 69 |
+
fi
|
| 70 |
+
shift
|
| 71 |
+
;;
|
| 72 |
+
--slide-csv)
|
| 73 |
+
shift
|
| 74 |
+
CSV_PATH="$1"
|
| 75 |
+
if [[ -n "$CSV_PATH" ]]; then
|
| 76 |
+
# Convert to absolute path
|
| 77 |
+
CSV_PATH=$(readlink -f "$CSV_PATH")
|
| 78 |
+
CSV_DIR=$(dirname "$CSV_PATH")
|
| 79 |
+
CSV_FILE=$(basename "$CSV_PATH")
|
| 80 |
+
# Mount the parent directory of CSV (to allow slides in subdirs)
|
| 81 |
+
# This allows the CSV to reference slides with relative paths
|
| 82 |
+
VOLUME_MOUNTS+=("-v" "$CSV_DIR:/mnt/data:ro")
|
| 83 |
+
# Pass the container path to mosaic
|
| 84 |
+
ARGS+=("--slide-csv" "/mnt/data/$CSV_FILE")
|
| 85 |
+
fi
|
| 86 |
+
shift
|
| 87 |
+
;;
|
| 88 |
+
--output-dir)
|
| 89 |
+
shift
|
| 90 |
+
OUTPUT_DIR="$1"
|
| 91 |
+
if [[ -n "$OUTPUT_DIR" ]]; then
|
| 92 |
+
# Convert to absolute path and create if it doesn't exist
|
| 93 |
+
mkdir -p "$OUTPUT_DIR"
|
| 94 |
+
OUTPUT_DIR=$(readlink -f "$OUTPUT_DIR")
|
| 95 |
+
# Ensure directory is writable by current user
|
| 96 |
+
chmod u+rwx "$OUTPUT_DIR" 2>/dev/null || true
|
| 97 |
+
# Mount the output directory
|
| 98 |
+
VOLUME_MOUNTS+=("-v" "$OUTPUT_DIR:/mnt/output")
|
| 99 |
+
# Pass the container path to mosaic
|
| 100 |
+
ARGS+=("--output-dir" "/mnt/output")
|
| 101 |
+
fi
|
| 102 |
+
shift
|
| 103 |
+
;;
|
| 104 |
+
*)
|
| 105 |
+
# Pass through all other arguments
|
| 106 |
+
ARGS+=("$ARG")
|
| 107 |
+
shift
|
| 108 |
+
;;
|
| 109 |
+
esac
|
| 110 |
done
|
| 111 |
|
| 112 |
# Remove duplicate volume mounts
|
| 113 |
UNIQUE_MOUNTS=$(printf '%s\n' "${VOLUME_MOUNTS[@]}" | awk '!seen[$0]++')
|
| 114 |
VOLUME_ARGS=()
|
| 115 |
while IFS= read -r mount; do
|
| 116 |
+
[[ -n "$mount" ]] && VOLUME_ARGS+=("$mount")
|
| 117 |
+
done <<<"$UNIQUE_MOUNTS"
|
| 118 |
|
| 119 |
# Run the Docker container with the mosaic CLI
|
| 120 |
# Override the entrypoint to run mosaic command instead of gradio_app
|
| 121 |
GPU_ARGS=()
|
| 122 |
if [ "$USE_GPU" = true ]; then
|
| 123 |
+
GPU_ARGS+=("--gpus=all" "--runtime=nvidia")
|
| 124 |
fi
|
| 125 |
|
| 126 |
# Get current user ID and group ID to ensure output files have correct ownership
|
|
|
|
| 128 |
GROUP_ID=$(id -g)
|
| 129 |
|
| 130 |
docker run --rm \
|
| 131 |
+
"${GPU_ARGS[@]}" \
|
| 132 |
+
--user "${USER_ID}:${GROUP_ID}" \
|
| 133 |
+
--env HF_TOKEN="${HF_TOKEN}" \
|
| 134 |
+
--env HF_HOME=/mnt/hf_cache \
|
| 135 |
+
--env TRANSFORMERS_CACHE=/mnt/hf_cache \
|
| 136 |
+
--shm-size=3G \
|
| 137 |
+
-v "${HF_CACHE_DIR}:/mnt/hf_cache" \
|
| 138 |
+
"${VOLUME_ARGS[@]}" \
|
| 139 |
+
--entrypoint mosaic \
|
| 140 |
+
"$DOCKER_IMAGE" \
|
| 141 |
+
"${ARGS[@]}"
|