Spaces:

msradam
/

riprap

Running

App Files Files Community

riprap / entrypoint.sh

seriffic

deploy(l4): self-contained Riprap mirror

3dbff85 3 days ago

raw

history blame contribute delete

4.18 kB

	#!/usr/bin/env sh
	# Entrypoint for the personal HF Space (msradam/riprap-nyc) on L4.
	#
	# Boots three things in order:
	# 1. Ollama serve → granite4.1:8b on localhost:11434
	# 2. riprap-models → Prithvi/TerraMind/TTM/GLiNER/Embedding on :7861
	# 3. web.main → FastAPI + SSE on :7860 (HF Spaces public port)
	#
	# The 8B is baked into the image (see Dockerfile.l4); the EO toolchain
	# (terratorch + deps) installs at runtime to keep the build sandbox
	# under its disk threshold. ~2 minutes on first cold start; cached
	# thereafter for the lifetime of the image.

	set -e

	# --- 1. EO toolchain (runtime-installed; same pattern as the canonical
	# entrypoint.sh) -------------------------------------------------
	EO_DIR="$HOME/.eo-pkgs"
	EO_MARKER="$EO_DIR/.installed"
	if [ ! -f "$EO_MARKER" ]; then
	echo "[entrypoint.l4] installing EO toolchain into $EO_DIR ..."
	mkdir -p "$EO_DIR"
	# torchvision is now baked into the base image (Dockerfile.l4) so
	# don't re-install it here — the EO_DIR shadowing copy was the
	# source of the `torchvision::nms does not exist` runtime error.
	if pip install --no-cache-dir --no-deps --target="$EO_DIR" \
	terratorch==1.1rc6 einops diffusers timm; then
	if PYTHONPATH="$EO_DIR:$PYTHONPATH" python -c "
	import terratorch
	import terratorch.models.backbones.terramind.model.terramind_register
	from terratorch.registry import FULL_MODEL_REGISTRY
	n = len([k for k in FULL_MODEL_REGISTRY if 'terramind' in k.lower()])
	assert n > 0
	print(f'[entrypoint.l4] terratorch ok ({n} terramind entries)')
	"; then
	touch "$EO_MARKER"
	echo "[entrypoint.l4] EO toolchain READY"
	else
	echo "[entrypoint.l4] EO verify FAILED — Prithvi/TerraMind probes will skip"
	fi
	else
	echo "[entrypoint.l4] pip install FAILED — Prithvi/TerraMind probes will skip"
	fi
	else
	echo "[entrypoint.l4] EO toolchain already installed (cached)"
	fi
	export PYTHONPATH="$EO_DIR:$PYTHONPATH"

	# --- 2. Ollama serve --------------------------------------------------
	LOG_OLLAMA="$HOME/ollama.log"
	ollama serve 2>&1 \| tee "$LOG_OLLAMA" &
	OLLAMA_PID=$!

	for i in $(seq 1 60); do
	if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then
	echo "[entrypoint.l4] ollama up (pid $OLLAMA_PID) after ${i}s"
	break
	fi
	if ! kill -0 "$OLLAMA_PID" 2>/dev/null; then
	echo "[entrypoint.l4] FATAL: ollama serve died"
	tail -40 "$LOG_OLLAMA" \|\| true
	exit 1
	fi
	sleep 1
	done

	# Granite 4.1:8b is baked. Pre-warm into VRAM so the first reconcile
	# doesn't pay the ~30s model-load tax.
	echo "[entrypoint.l4] pre-warming granite4.1:8b into VRAM ..."
	curl -s -X POST http://127.0.0.1:11434/api/generate \
	-d '{"model":"granite4.1:8b","prompt":"hi","stream":false,"keep_alive":"24h","options":{"num_predict":1}}' \
	-o /dev/null --max-time 120 \
	&& echo "[entrypoint.l4] granite warm" \
	\|\| echo "[entrypoint.l4] WARNING: granite warmup failed (will load lazily)"

	# --- 3. riprap-models on :7861 ---------------------------------------
	# Same FastAPI app the AMD droplet runs, just rehosted in-process here
	# so app/inference.py's RIPRAP_ML_BASE_URL points at localhost.
	LOG_MODELS="$HOME/riprap-models.log"
	uvicorn riprap_models:app --host 127.0.0.1 --port 7861 --log-level info \
	> "$LOG_MODELS" 2>&1 &
	MODELS_PID=$!

	for i in $(seq 1 60); do
	if curl -sf http://127.0.0.1:7861/healthz > /dev/null 2>&1; then
	echo "[entrypoint.l4] riprap-models up (pid $MODELS_PID) after ${i}s"
	break
	fi
	if ! kill -0 "$MODELS_PID" 2>/dev/null; then
	echo "[entrypoint.l4] FATAL: riprap-models died"
	tail -40 "$LOG_MODELS" \|\| true
	exit 1
	fi
	sleep 1
	done

	# --- GPU sanity --------------------------------------------------------
	if command -v nvidia-smi > /dev/null 2>&1; then
	echo "[entrypoint.l4] nvidia-smi:"
	nvidia-smi -L \|\| true
	else
	echo "[entrypoint.l4] WARNING: nvidia-smi missing — running on CPU"
	fi

	# --- 4. Web app (foreground) -----------------------------------------
	exec uvicorn web.main:app --host 0.0.0.0 --port 7860 --log-level info