Marlin Lee
Add Save as Preset button (--editable-examples flag); wire examples file in entrypoint
b49845a
#!/bin/bash
set -e
DATA_DIR=/app/data
mkdir -p "$DATA_DIR"
# ── Download all data from HF Dataset repo ───────────────────────────────────
echo "Downloading data from HF Dataset: ${HF_DATASET_REPO}"
python - <<'PYEOF'
import os
from huggingface_hub import snapshot_download
snapshot_download(
repo_id=os.environ["HF_DATASET_REPO"],
repo_type="dataset",
local_dir="/app/data",
local_dir_use_symlinks=False,
token=os.environ.get("HF_TOKEN"),
)
PYEOF
echo "Download complete."
# ── Extract thumbnail archives if present ────────────────────────────────────
for archive in nsd_thumbs.tar.gz coco_thumbs.tar.gz union_mei_thumbs.tar.gz; do
path="$DATA_DIR/$archive"
dir="$DATA_DIR/${archive%.tar.gz}"
if [ -f "$path" ] && [ ! -d "$dir" ]; then
echo "Extracting $archive into $DATA_DIR ..."
tar -xzf "$path" -C "$DATA_DIR" --no-same-owner
echo " Extracted $(ls "$dir" 2>/dev/null | wc -l) files into $dir"
rm -f "$path"
fi
done
# ── Verify key files ──────────────────────────────────────────────────────────
python3 - <<'PYEOF'
import os, sys
required = [
"/app/data/explorer_data_union_dinov2_l11.pt",
]
missing = [p for p in required if not os.path.isfile(p)]
if missing:
print("ERROR: missing required files:")
for p in missing:
print(f" {p}")
sys.exit(1)
# Report what we have
for root, dirs, files in os.walk("/app/data"):
depth = root[len("/app/data"):].count(os.sep)
if depth <= 1:
indent = " " * depth
print(f"{indent}{os.path.basename(root)}/")
for f in sorted(files)[:10]:
size = os.path.getsize(os.path.join(root, f))
print(f"{indent} {f} ({size//1024//1024} MB)")
if len(files) > 10:
print(f"{indent} ... and {len(files)-10} more files")
print("All required files present.")
PYEOF
# ── Key paths ─────────────────────────────────────────────────────────────────
EXPLORER_DATA="$DATA_DIR/explorer_data_union_dinov2_l11.pt"
PHI_DIR="$DATA_DIR/phis"
NSD_THUMBS="$DATA_DIR/nsd_thumbs"
# Accept either coco_thumbs (extracted from tar) or hf_coco_thumbs (uploaded directly)
if [ -d "$DATA_DIR/coco_thumbs" ] && [ "$(ls -A "$DATA_DIR/coco_thumbs" 2>/dev/null)" ]; then
COCO_THUMBS="$DATA_DIR/coco_thumbs"
elif [ -d "$DATA_DIR/hf_coco_thumbs" ]; then
COCO_THUMBS="$DATA_DIR/hf_coco_thumbs"
else
COCO_THUMBS="$DATA_DIR/coco_thumbs"
fi
MEI_THUMBS="$DATA_DIR/union_mei_thumbs"
# ── Build optional arg flags ──────────────────────────────────────────────────
PHI_ARGS=()
if [ -d "$PHI_DIR" ]; then
PHI_ARGS+=(--phi-dir "$PHI_DIR")
PHI_ARGS+=(--phi-model "dinov2_sae")
fi
# ── DynaDiff: Modal endpoint mode ────────────────────────────────────────────
# Set DYNADIFF_MODAL_URL and DYNADIFF_MODAL_TOKEN in HF Spaces secrets.
# No checkpoint or H5 file needed here β€” inference runs on Modal's GPU.
MODAL_ARGS=()
if [ -n "${DYNADIFF_MODAL_URL:-}" ]; then
MODAL_ARGS+=(--dynadiff-modal-url "$DYNADIFF_MODAL_URL")
if [ -n "${DYNADIFF_MODAL_TOKEN:-}" ]; then
MODAL_ARGS+=(--dynadiff-modal-token "$DYNADIFF_MODAL_TOKEN")
fi
echo " DynaDiff: Modal endpoint ($DYNADIFF_MODAL_URL)"
else
echo " DynaDiff: disabled (DYNADIFF_MODAL_URL not set)"
fi
# --image-dir is required; create empty fallback if missing
if [ ! -d "$COCO_THUMBS" ]; then
echo "WARNING: no COCO thumbnails found, creating empty placeholder"
mkdir -p "$COCO_THUMBS"
fi
IMAGE_DIR_ARG=(--image-dir "$COCO_THUMBS")
# Extra image dirs: NSD thumbs + MEI thumbnail archive (if present)
EXTRA_IMAGE_ARGS=()
[ -d "$NSD_THUMBS" ] && EXTRA_IMAGE_ARGS+=(--extra-image-dir "$NSD_THUMBS")
[ -d "$MEI_THUMBS" ] && EXTRA_IMAGE_ARGS+=(--extra-image-dir "$MEI_THUMBS")
# Brain thumbnails for DynaDiff GT images
BRAIN_THUMB_ARGS=()
[ -d "$NSD_THUMBS" ] && BRAIN_THUMB_ARGS+=(--brain-thumbnails "$NSD_THUMBS")
# SAE weights for live patch inference (CPU)
SAE_ARGS=()
SAE_PATH="${SAE_PATH:-$DATA_DIR/sae.pth}"
if [ -f "$SAE_PATH" ]; then
SAE_ARGS+=(--sae-path "$SAE_PATH")
echo " SAE: $SAE_PATH (patch inference enabled)"
else
echo " SAE: not found at $SAE_PATH (patch inference disabled)"
fi
# Example presets
EXAMPLES_ARGS=()
EXAMPLES_PATH="$DATA_DIR/example_presets.json"
if [ -f "$EXAMPLES_PATH" ]; then
EXAMPLES_ARGS+=(--examples-file "$EXAMPLES_PATH")
if [ "${EDITABLE_EXAMPLES:-0}" = "1" ]; then
EXAMPLES_ARGS+=(--editable-examples)
echo " Examples: $EXAMPLES_PATH (editable)"
else
echo " Examples: $EXAMPLES_PATH (read-only)"
fi
fi
# ── Determine websocket origin ────────────────────────────────────────────────
SPACE_HOST="${SPACE_HOST:-localhost}"
# ── Launch Bokeh server ───────────────────────────────────────────────────────
echo "Starting Bokeh server on port 7860..."
echo " Primary data: $EXPLORER_DATA"
echo " COCO thumbs: $COCO_THUMBS (exists: $([ -d "$COCO_THUMBS" ] && echo yes || echo no))"
echo " NSD thumbs: $NSD_THUMBS (exists: $([ -d "$NSD_THUMBS" ] && echo yes || echo no))"
echo " MEI thumbs: $MEI_THUMBS (exists: $([ -d "$MEI_THUMBS" ] && echo yes || echo no))"
echo " Phi dir: $PHI_DIR (exists: $([ -d "$PHI_DIR" ] && echo yes || echo no))"
cd /app
bokeh serve scripts/explorer \
--port 7860 \
--allow-websocket-origin="${SPACE_HOST}" \
--allow-websocket-origin="localhost:7860" \
--session-token-expiration 86400 \
--args \
--data "$EXPLORER_DATA" \
--primary-label "DINOv2 L11 Spatial β€” Union (d=10K)" \
"${IMAGE_DIR_ARG[@]}" \
"${EXTRA_IMAGE_ARGS[@]}" \
"${PHI_ARGS[@]}" \
"${BRAIN_THUMB_ARGS[@]}" \
"${SAE_ARGS[@]}" \
"${MODAL_ARGS[@]}" \
"${EXAMPLES_ARGS[@]}"