#!/bin/bash set -e DATA_DIR=/app/data mkdir -p "$DATA_DIR" # ── Download all data from HF Dataset repo ─────────────────────────────────── echo "Downloading data from HF Dataset: ${HF_DATASET_REPO}" python - <<'PYEOF' import os from huggingface_hub import snapshot_download snapshot_download( repo_id=os.environ["HF_DATASET_REPO"], repo_type="dataset", local_dir="/app/data", local_dir_use_symlinks=False, token=os.environ.get("HF_TOKEN"), ) PYEOF echo "Download complete." # ── Extract thumbnail archives if present ──────────────────────────────────── for archive in nsd_thumbs.tar.gz coco_thumbs.tar.gz union_mei_thumbs.tar.gz; do path="$DATA_DIR/$archive" dir="$DATA_DIR/${archive%.tar.gz}" if [ -f "$path" ] && [ ! -d "$dir" ]; then echo "Extracting $archive into $DATA_DIR ..." tar -xzf "$path" -C "$DATA_DIR" --no-same-owner echo " Extracted $(ls "$dir" 2>/dev/null | wc -l) files into $dir" rm -f "$path" fi done # ── Verify key files ────────────────────────────────────────────────────────── python3 - <<'PYEOF' import os, sys required = [ "/app/data/explorer_data_union_dinov2_l11.pt", ] missing = [p for p in required if not os.path.isfile(p)] if missing: print("ERROR: missing required files:") for p in missing: print(f" {p}") sys.exit(1) # Report what we have for root, dirs, files in os.walk("/app/data"): depth = root[len("/app/data"):].count(os.sep) if depth <= 1: indent = " " * depth print(f"{indent}{os.path.basename(root)}/") for f in sorted(files)[:10]: size = os.path.getsize(os.path.join(root, f)) print(f"{indent} {f} ({size//1024//1024} MB)") if len(files) > 10: print(f"{indent} ... and {len(files)-10} more files") print("All required files present.") PYEOF # ── Key paths ───────────────────────────────────────────────────────────────── EXPLORER_DATA="$DATA_DIR/explorer_data_union_dinov2_l11.pt" PHI_DIR="$DATA_DIR/phis" NSD_THUMBS="$DATA_DIR/nsd_thumbs" # Accept either coco_thumbs (extracted from tar) or hf_coco_thumbs (uploaded directly) if [ -d "$DATA_DIR/coco_thumbs" ] && [ "$(ls -A "$DATA_DIR/coco_thumbs" 2>/dev/null)" ]; then COCO_THUMBS="$DATA_DIR/coco_thumbs" elif [ -d "$DATA_DIR/hf_coco_thumbs" ]; then COCO_THUMBS="$DATA_DIR/hf_coco_thumbs" else COCO_THUMBS="$DATA_DIR/coco_thumbs" fi MEI_THUMBS="$DATA_DIR/union_mei_thumbs" # ── Build optional arg flags ────────────────────────────────────────────────── PHI_ARGS=() if [ -d "$PHI_DIR" ]; then PHI_ARGS+=(--phi-dir "$PHI_DIR") PHI_ARGS+=(--phi-model "dinov2_sae") fi # ── DynaDiff: Modal endpoint mode ──────────────────────────────────────────── # Set DYNADIFF_MODAL_URL and DYNADIFF_MODAL_TOKEN in HF Spaces secrets. # No checkpoint or H5 file needed here — inference runs on Modal's GPU. MODAL_ARGS=() if [ -n "${DYNADIFF_MODAL_URL:-}" ]; then MODAL_ARGS+=(--dynadiff-modal-url "$DYNADIFF_MODAL_URL") if [ -n "${DYNADIFF_MODAL_TOKEN:-}" ]; then MODAL_ARGS+=(--dynadiff-modal-token "$DYNADIFF_MODAL_TOKEN") fi echo " DynaDiff: Modal endpoint ($DYNADIFF_MODAL_URL)" else echo " DynaDiff: disabled (DYNADIFF_MODAL_URL not set)" fi # --image-dir is required; create empty fallback if missing if [ ! -d "$COCO_THUMBS" ]; then echo "WARNING: no COCO thumbnails found, creating empty placeholder" mkdir -p "$COCO_THUMBS" fi IMAGE_DIR_ARG=(--image-dir "$COCO_THUMBS") # Extra image dirs: NSD thumbs + MEI thumbnail archive (if present) EXTRA_IMAGE_ARGS=() [ -d "$NSD_THUMBS" ] && EXTRA_IMAGE_ARGS+=(--extra-image-dir "$NSD_THUMBS") [ -d "$MEI_THUMBS" ] && EXTRA_IMAGE_ARGS+=(--extra-image-dir "$MEI_THUMBS") # Brain thumbnails for DynaDiff GT images BRAIN_THUMB_ARGS=() [ -d "$NSD_THUMBS" ] && BRAIN_THUMB_ARGS+=(--brain-thumbnails "$NSD_THUMBS") # SAE weights for live patch inference (CPU) SAE_ARGS=() SAE_PATH="${SAE_PATH:-$DATA_DIR/sae.pth}" if [ -f "$SAE_PATH" ]; then SAE_ARGS+=(--sae-path "$SAE_PATH") echo " SAE: $SAE_PATH (patch inference enabled)" else echo " SAE: not found at $SAE_PATH (patch inference disabled)" fi # Example presets EXAMPLES_ARGS=() EXAMPLES_PATH="$DATA_DIR/example_presets.json" if [ -f "$EXAMPLES_PATH" ]; then EXAMPLES_ARGS+=(--examples-file "$EXAMPLES_PATH") if [ "${EDITABLE_EXAMPLES:-0}" = "1" ]; then EXAMPLES_ARGS+=(--editable-examples) echo " Examples: $EXAMPLES_PATH (editable)" else echo " Examples: $EXAMPLES_PATH (read-only)" fi fi # ── Determine websocket origin ──────────────────────────────────────────────── SPACE_HOST="${SPACE_HOST:-localhost}" # ── Launch Bokeh server ─────────────────────────────────────────────────────── echo "Starting Bokeh server on port 7860..." echo " Primary data: $EXPLORER_DATA" echo " COCO thumbs: $COCO_THUMBS (exists: $([ -d "$COCO_THUMBS" ] && echo yes || echo no))" echo " NSD thumbs: $NSD_THUMBS (exists: $([ -d "$NSD_THUMBS" ] && echo yes || echo no))" echo " MEI thumbs: $MEI_THUMBS (exists: $([ -d "$MEI_THUMBS" ] && echo yes || echo no))" echo " Phi dir: $PHI_DIR (exists: $([ -d "$PHI_DIR" ] && echo yes || echo no))" cd /app bokeh serve scripts/explorer \ --port 7860 \ --allow-websocket-origin="${SPACE_HOST}" \ --allow-websocket-origin="localhost:7860" \ --session-token-expiration 86400 \ --args \ --data "$EXPLORER_DATA" \ --primary-label "DINOv2 L11 Spatial — Union (d=10K)" \ "${IMAGE_DIR_ARG[@]}" \ "${EXTRA_IMAGE_ARGS[@]}" \ "${PHI_ARGS[@]}" \ "${BRAIN_THUMB_ARGS[@]}" \ "${SAE_ARGS[@]}" \ "${MODAL_ARGS[@]}" \ "${EXAMPLES_ARGS[@]}"