raylim Claude Opus 4.5 commited on
Commit
3e64df2
·
unverified ·
1 Parent(s): b661ed7

Improve Docker configuration for HuggingFace cache and permissions

Browse files

- Add HF_HOME and TRANSFORMERS_CACHE env vars with mounted cache directory
- Increase shared memory from 500m to 3G
- Add HF_TOKEN validation to all Docker targets
- Create output directory before Docker runs to ensure proper ownership
- Add ARGS variable support for passing extra arguments to mosaic
- Update volume mount paths to use /mnt/slides, /mnt/data, /mnt/output
- Add docker-run-cli target for running arbitrary mosaic CLI commands
- Fix mosaic script to use mskmind/mosaic image and consistent formatting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. Makefile +57 -25
  2. mosaic +82 -70
Makefile CHANGED
@@ -1,4 +1,4 @@
1
- .PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-push docker-clean run-ui run-cli
2
 
3
  # Default target
4
  .DEFAULT_GOAL := help
@@ -75,14 +75,19 @@ run-ui: ## Launch Gradio web interface
75
  run-ui-public: ## Launch Gradio web interface with public sharing
76
  $(PYTHON) -m mosaic.gradio_app --share
77
 
78
- run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir)
79
- $(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT)
80
 
81
- run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir)
82
- $(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT)
83
 
84
  ##@ Docker
85
 
 
 
 
 
 
86
  docker-build: ## Build Docker image with SSH forwarding
87
  @echo "Building Docker image with SSH authentication..."
88
  @./build.sh
@@ -96,38 +101,65 @@ docker-build-no-cache: ## Build Docker image without cache
96
  eval "$$(ssh-agent -k)"
97
 
98
  docker-run: ## Run Docker container (web UI mode)
 
99
  docker run -it --rm \
100
- --gpus all \
 
101
  -p 7860:7860 \
102
- -v $(PWD)/data:/app/data \
103
- -v $(PWD)/output:/app/output \
104
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
105
 
106
- docker-run-single: ## Run Docker container (single slide mode)
 
107
  docker run -it --rm \
108
- --gpus all \
109
- -v $(PWD)/data:/app/data \
110
- -v $(PWD)/output:/app/output \
 
 
111
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
112
- --slide-path /app/data/$(SLIDE) \
113
- --output-dir /app/output
114
 
115
- docker-run-batch: ## Run Docker container (batch mode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  docker run -it --rm \
117
- --gpus all \
118
- -v $(PWD)/data:/app/data \
119
- -v $(PWD)/output:/app/output \
 
 
120
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
121
- --slide-csv /app/data/$(CSV) \
122
- --output-dir /app/output
 
123
 
124
  docker-shell: ## Open shell in Docker container
 
125
  docker run -it --rm \
126
- --gpus all \
127
- -v $(PWD)/data:/app/data \
128
- -v $(PWD)/output:/app/output \
129
- $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
130
- /bin/bash
 
131
 
132
  docker-tag: ## Tag Docker image for registry
133
  docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
 
1
+ .PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-run-cli docker-push docker-clean run-ui run-cli
2
 
3
  # Default target
4
  .DEFAULT_GOAL := help
 
75
  run-ui-public: ## Launch Gradio web interface with public sharing
76
  $(PYTHON) -m mosaic.gradio_app --share
77
 
78
+ run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir [ARGS="--extra-args"])
79
+ $(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT) $(ARGS)
80
 
81
+ run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir [ARGS="--extra-args"])
82
+ $(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT) $(ARGS)
83
 
84
  ##@ Docker
85
 
86
+ # Docker run options matching the mosaic entrypoint script
87
+ DOCKER_GPU_ARGS := --gpus=all --runtime=nvidia
88
+ HF_CACHE_DIR := $(HOME)/.cache/huggingface
89
+ DOCKER_COMMON_ARGS := --shm-size=3G --env HF_TOKEN="$(HF_TOKEN)" --env HF_HOME=/mnt/hf_cache --env TRANSFORMERS_CACHE=/mnt/hf_cache --user $(shell id -u):$(shell id -g) -v $(HF_CACHE_DIR):/mnt/hf_cache
90
+
91
  docker-build: ## Build Docker image with SSH forwarding
92
  @echo "Building Docker image with SSH authentication..."
93
  @./build.sh
 
101
  eval "$$(ssh-agent -k)"
102
 
103
  docker-run: ## Run Docker container (web UI mode)
104
+ @test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
105
  docker run -it --rm \
106
+ $(DOCKER_GPU_ARGS) \
107
+ $(DOCKER_COMMON_ARGS) \
108
  -p 7860:7860 \
109
+ -v $(PWD)/data:/mnt/data:ro \
110
+ -v $(PWD)/output:/mnt/output \
111
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
112
 
113
+ docker-run-cli: ## Run Docker container with mosaic CLI (usage: make docker-run-cli ARGS="--help")
114
+ @test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
115
  docker run -it --rm \
116
+ $(DOCKER_GPU_ARGS) \
117
+ $(DOCKER_COMMON_ARGS) \
118
+ -v $(PWD)/data:/mnt/data:ro \
119
+ -v $(PWD)/output:/mnt/output \
120
+ --entrypoint mosaic \
121
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
122
+ $(ARGS)
 
123
 
124
+ docker-run-single: ## Run Docker container (single slide mode, usage: make docker-run-single SLIDE=path/to/slide.svs [ARGS="--extra-args"])
125
+ @test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
126
+ @test -n "$(SLIDE)" || (echo "Error: SLIDE variable is required" && exit 1)
127
+ @mkdir -p $(PWD)/output
128
+ docker run -it --rm \
129
+ $(DOCKER_GPU_ARGS) \
130
+ $(DOCKER_COMMON_ARGS) \
131
+ -v $(dir $(abspath $(SLIDE))):/mnt/slides:ro \
132
+ -v $(PWD)/output:/mnt/output \
133
+ --entrypoint mosaic \
134
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
135
+ --slide-path /mnt/slides/$(notdir $(SLIDE)) \
136
+ --output-dir /mnt/output \
137
+ $(ARGS)
138
+
139
+ docker-run-batch: ## Run Docker container (batch mode, usage: make docker-run-batch CSV=path/to/slides.csv [ARGS="--extra-args"])
140
+ @test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
141
+ @test -n "$(CSV)" || (echo "Error: CSV variable is required" && exit 1)
142
+ @mkdir -p $(PWD)/output
143
  docker run -it --rm \
144
+ $(DOCKER_GPU_ARGS) \
145
+ $(DOCKER_COMMON_ARGS) \
146
+ -v $(dir $(abspath $(CSV))):/mnt/data:ro \
147
+ -v $(PWD)/output:/mnt/output \
148
+ --entrypoint mosaic \
149
  $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
150
+ --slide-csv /mnt/data/$(notdir $(CSV)) \
151
+ --output-dir /mnt/output \
152
+ $(ARGS)
153
 
154
  docker-shell: ## Open shell in Docker container
155
+ @test -n "$(HF_TOKEN)" || (echo "Error: HF_TOKEN environment variable is not set" && exit 1)
156
  docker run -it --rm \
157
+ $(DOCKER_GPU_ARGS) \
158
+ $(DOCKER_COMMON_ARGS) \
159
+ -v $(PWD)/data:/mnt/data:ro \
160
+ -v $(PWD)/output:/mnt/output \
161
+ --entrypoint /bin/bash \
162
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
163
 
164
  docker-tag: ## Tag Docker image for registry
165
  docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
mosaic CHANGED
@@ -20,10 +20,14 @@
20
  # in the same directory tree. Slides can be in subdirectories relative to
21
  # the CSV file location.
22
 
23
- DOCKER_IMAGE="tomp/mosaic-gradio"
24
  USE_GPU=true
 
25
 
26
- die () { echo "FATAL: $*" >&2; exit 1; }
 
 
 
27
 
28
  # Check if Docker is available
29
  command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH"
@@ -31,13 +35,16 @@ command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH
31
  # Check if HF_TOKEN is set
32
  [[ -n $HF_TOKEN ]] || die "HF_TOKEN environment variable is not set. Please set it to your HuggingFace access token."
33
 
 
 
 
34
  # Check if nvidia runtime is available
35
  if docker info 2>/dev/null | grep -q nvidia; then
36
- USE_GPU=true
37
  else
38
- echo "Warning: NVIDIA Docker runtime not detected. Running without GPU support." >&2
39
- echo " To enable GPU support, install the NVIDIA Container Toolkit." >&2
40
- USE_GPU=false
41
  fi
42
 
43
  # Parse arguments to identify paths that need to be mounted
@@ -45,73 +52,75 @@ VOLUME_MOUNTS=()
45
  ARGS=()
46
 
47
  while [[ $# -gt 0 ]]; do
48
- ARG="$1"
49
- case "$ARG" in
50
- --slide-path)
51
- shift
52
- SLIDE_PATH="$1"
53
- if [[ -n "$SLIDE_PATH" ]]; then
54
- # Convert to absolute path
55
- SLIDE_PATH=$(readlink -f "$SLIDE_PATH")
56
- SLIDE_DIR=$(dirname "$SLIDE_PATH")
57
- SLIDE_FILE=$(basename "$SLIDE_PATH")
58
- # Mount the directory containing the slide
59
- VOLUME_MOUNTS+=("-v" "$SLIDE_DIR:/mnt/slides:ro")
60
- # Pass the container path to mosaic
61
- ARGS+=("--slide-path" "/mnt/slides/$SLIDE_FILE")
62
- fi
63
- shift
64
- ;;
65
- --slide-csv)
66
- shift
67
- CSV_PATH="$1"
68
- if [[ -n "$CSV_PATH" ]]; then
69
- # Convert to absolute path
70
- CSV_PATH=$(readlink -f "$CSV_PATH")
71
- CSV_DIR=$(dirname "$CSV_PATH")
72
- CSV_FILE=$(basename "$CSV_PATH")
73
- # Mount the parent directory of CSV (to allow slides in subdirs)
74
- # This allows the CSV to reference slides with relative paths
75
- VOLUME_MOUNTS+=("-v" "$CSV_DIR:/mnt/data:ro")
76
- # Pass the container path to mosaic
77
- ARGS+=("--slide-csv" "/mnt/data/$CSV_FILE")
78
- fi
79
- shift
80
- ;;
81
- --output-dir)
82
- shift
83
- OUTPUT_DIR="$1"
84
- if [[ -n "$OUTPUT_DIR" ]]; then
85
- # Convert to absolute path and create if it doesn't exist
86
- mkdir -p "$OUTPUT_DIR"
87
- OUTPUT_DIR=$(readlink -f "$OUTPUT_DIR")
88
- # Mount the output directory
89
- VOLUME_MOUNTS+=("-v" "$OUTPUT_DIR:/mnt/output")
90
- # Pass the container path to mosaic
91
- ARGS+=("--output-dir" "/mnt/output")
92
- fi
93
- shift
94
- ;;
95
- *)
96
- # Pass through all other arguments
97
- ARGS+=("$ARG")
98
- shift
99
- ;;
100
- esac
 
 
101
  done
102
 
103
  # Remove duplicate volume mounts
104
  UNIQUE_MOUNTS=$(printf '%s\n' "${VOLUME_MOUNTS[@]}" | awk '!seen[$0]++')
105
  VOLUME_ARGS=()
106
  while IFS= read -r mount; do
107
- [[ -n "$mount" ]] && VOLUME_ARGS+=("$mount")
108
- done <<< "$UNIQUE_MOUNTS"
109
 
110
  # Run the Docker container with the mosaic CLI
111
  # Override the entrypoint to run mosaic command instead of gradio_app
112
  GPU_ARGS=()
113
  if [ "$USE_GPU" = true ]; then
114
- GPU_ARGS+=("--gpus=all" "--runtime=nvidia")
115
  fi
116
 
117
  # Get current user ID and group ID to ensure output files have correct ownership
@@ -119,11 +128,14 @@ USER_ID=$(id -u)
119
  GROUP_ID=$(id -g)
120
 
121
  docker run --rm \
122
- "${GPU_ARGS[@]}" \
123
- --user "${USER_ID}:${GROUP_ID}" \
124
- --env HF_TOKEN="${HF_TOKEN}" \
125
- --shm-size=500m \
126
- "${VOLUME_ARGS[@]}" \
127
- --entrypoint mosaic \
128
- "$DOCKER_IMAGE" \
129
- "${ARGS[@]}"
 
 
 
 
20
  # in the same directory tree. Slides can be in subdirectories relative to
21
  # the CSV file location.
22
 
23
+ DOCKER_IMAGE="mskmind/mosaic"
24
  USE_GPU=true
25
+ HF_CACHE_DIR="${HOME}/.cache/huggingface"
26
 
27
+ die() {
28
+ echo "FATAL: $*" >&2
29
+ exit 1
30
+ }
31
 
32
  # Check if Docker is available
33
  command -v docker >/dev/null 2>&1 || die "Docker is not installed or not in PATH"
 
35
  # Check if HF_TOKEN is set
36
  [[ -n $HF_TOKEN ]] || die "HF_TOKEN environment variable is not set. Please set it to your HuggingFace access token."
37
 
38
+ # Create HuggingFace cache directory if it doesn't exist
39
+ mkdir -p "$HF_CACHE_DIR"
40
+
41
  # Check if nvidia runtime is available
42
  if docker info 2>/dev/null | grep -q nvidia; then
43
+ USE_GPU=true
44
  else
45
+ echo "Warning: NVIDIA Docker runtime not detected. Running without GPU support." >&2
46
+ echo " To enable GPU support, install the NVIDIA Container Toolkit." >&2
47
+ USE_GPU=false
48
  fi
49
 
50
  # Parse arguments to identify paths that need to be mounted
 
52
  ARGS=()
53
 
54
  while [[ $# -gt 0 ]]; do
55
+ ARG="$1"
56
+ case "$ARG" in
57
+ --slide-path)
58
+ shift
59
+ SLIDE_PATH="$1"
60
+ if [[ -n "$SLIDE_PATH" ]]; then
61
+ # Convert to absolute path
62
+ SLIDE_PATH=$(readlink -f "$SLIDE_PATH")
63
+ SLIDE_DIR=$(dirname "$SLIDE_PATH")
64
+ SLIDE_FILE=$(basename "$SLIDE_PATH")
65
+ # Mount the directory containing the slide
66
+ VOLUME_MOUNTS+=("-v" "$SLIDE_DIR:/mnt/slides:ro")
67
+ # Pass the container path to mosaic
68
+ ARGS+=("--slide-path" "/mnt/slides/$SLIDE_FILE")
69
+ fi
70
+ shift
71
+ ;;
72
+ --slide-csv)
73
+ shift
74
+ CSV_PATH="$1"
75
+ if [[ -n "$CSV_PATH" ]]; then
76
+ # Convert to absolute path
77
+ CSV_PATH=$(readlink -f "$CSV_PATH")
78
+ CSV_DIR=$(dirname "$CSV_PATH")
79
+ CSV_FILE=$(basename "$CSV_PATH")
80
+ # Mount the parent directory of CSV (to allow slides in subdirs)
81
+ # This allows the CSV to reference slides with relative paths
82
+ VOLUME_MOUNTS+=("-v" "$CSV_DIR:/mnt/data:ro")
83
+ # Pass the container path to mosaic
84
+ ARGS+=("--slide-csv" "/mnt/data/$CSV_FILE")
85
+ fi
86
+ shift
87
+ ;;
88
+ --output-dir)
89
+ shift
90
+ OUTPUT_DIR="$1"
91
+ if [[ -n "$OUTPUT_DIR" ]]; then
92
+ # Convert to absolute path and create if it doesn't exist
93
+ mkdir -p "$OUTPUT_DIR"
94
+ OUTPUT_DIR=$(readlink -f "$OUTPUT_DIR")
95
+ # Ensure directory is writable by current user
96
+ chmod u+rwx "$OUTPUT_DIR" 2>/dev/null || true
97
+ # Mount the output directory
98
+ VOLUME_MOUNTS+=("-v" "$OUTPUT_DIR:/mnt/output")
99
+ # Pass the container path to mosaic
100
+ ARGS+=("--output-dir" "/mnt/output")
101
+ fi
102
+ shift
103
+ ;;
104
+ *)
105
+ # Pass through all other arguments
106
+ ARGS+=("$ARG")
107
+ shift
108
+ ;;
109
+ esac
110
  done
111
 
112
  # Remove duplicate volume mounts
113
  UNIQUE_MOUNTS=$(printf '%s\n' "${VOLUME_MOUNTS[@]}" | awk '!seen[$0]++')
114
  VOLUME_ARGS=()
115
  while IFS= read -r mount; do
116
+ [[ -n "$mount" ]] && VOLUME_ARGS+=("$mount")
117
+ done <<<"$UNIQUE_MOUNTS"
118
 
119
  # Run the Docker container with the mosaic CLI
120
  # Override the entrypoint to run mosaic command instead of gradio_app
121
  GPU_ARGS=()
122
  if [ "$USE_GPU" = true ]; then
123
+ GPU_ARGS+=("--gpus=all" "--runtime=nvidia")
124
  fi
125
 
126
  # Get current user ID and group ID to ensure output files have correct ownership
 
128
  GROUP_ID=$(id -g)
129
 
130
  docker run --rm \
131
+ "${GPU_ARGS[@]}" \
132
+ --user "${USER_ID}:${GROUP_ID}" \
133
+ --env HF_TOKEN="${HF_TOKEN}" \
134
+ --env HF_HOME=/mnt/hf_cache \
135
+ --env TRANSFORMERS_CACHE=/mnt/hf_cache \
136
+ --shm-size=3G \
137
+ -v "${HF_CACHE_DIR}:/mnt/hf_cache" \
138
+ "${VOLUME_ARGS[@]}" \
139
+ --entrypoint mosaic \
140
+ "$DOCKER_IMAGE" \
141
+ "${ARGS[@]}"