#!/usr/bin/env sh # Inference Space entrypoint: Ollama + riprap-models + FastAPI proxy. set -e # --- 0. EO toolchain (terratorch + Sentinel-2 chain). Runtime-installed # because the build sandbox is too tight to fit it next to # Granite weights. --------------------------------------------- EO_DIR="$HOME/.eo-pkgs" EO_MARKER="$EO_DIR/.installed" if [ ! -f "$EO_MARKER" ]; then echo "[entrypoint.inf] installing EO toolchain into $EO_DIR ..." mkdir -p "$EO_DIR" if pip install --no-cache-dir --no-deps --target="$EO_DIR" \ terratorch==1.1rc6 einops diffusers timm; then if PYTHONPATH="$EO_DIR:$PYTHONPATH" python -c " import terratorch import terratorch.models.backbones.terramind.model.terramind_register from terratorch.registry import FULL_MODEL_REGISTRY n = len([k for k in FULL_MODEL_REGISTRY if 'terramind' in k.lower()]) assert n > 0 print(f'[entrypoint.inf] terratorch ok ({n} terramind entries)') "; then touch "$EO_MARKER" echo "[entrypoint.inf] EO toolchain READY" else echo "[entrypoint.inf] EO verify FAILED — TerraMind probes will skip" fi else echo "[entrypoint.inf] pip install FAILED — TerraMind probes will skip" fi else echo "[entrypoint.inf] EO toolchain cached" fi export PYTHONPATH="$EO_DIR:$PYTHONPATH" # --- 1. Ollama (Granite 4.1 baked into the image, just serve them) --- LOG_OLLAMA="$HOME/ollama.log" ollama serve 2>&1 | tee "$LOG_OLLAMA" & OLLAMA_PID=$! for i in $(seq 1 60); do if curl -sf http://127.0.0.1:11434/ > /dev/null 2>&1; then echo "[entrypoint.inf] ollama up after ${i}s" break fi if ! kill -0 "$OLLAMA_PID" 2>/dev/null; then echo "[entrypoint.inf] FATAL: ollama died" tail -40 "$LOG_OLLAMA" || true exit 1 fi sleep 1 done # Pre-warm 8B into VRAM (24h keep-alive). 3B will lazy-load on first # planner call. echo "[entrypoint.inf] pre-warming granite4.1:8b ..." curl -s -X POST http://127.0.0.1:11434/api/generate \ -d '{"model":"granite4.1:8b","prompt":"hi","stream":false,"keep_alive":"24h","options":{"num_predict":1}}' \ -o /dev/null --max-time 120 \ && echo "[entrypoint.inf] 8b warm" \ || echo "[entrypoint.inf] WARN: 8b warmup failed (will load lazily)" # --- 2. riprap-models on :7861 --------------------------------------- LOG_MODELS="$HOME/riprap-models.log" uvicorn riprap_models:app --host 127.0.0.1 --port 7861 --log-level info \ > "$LOG_MODELS" 2>&1 & MODELS_PID=$! for i in $(seq 1 60); do if curl -sf http://127.0.0.1:7861/healthz > /dev/null 2>&1; then echo "[entrypoint.inf] riprap-models up after ${i}s" break fi if ! kill -0 "$MODELS_PID" 2>/dev/null; then echo "[entrypoint.inf] FATAL: riprap-models died" tail -40 "$LOG_MODELS" || true exit 1 fi sleep 1 done # --- 3. GPU sanity --------------------------------------------------- if command -v nvidia-smi > /dev/null 2>&1; then nvidia-smi -L || true fi # --- 4. FastAPI bearer-auth proxy on :7860 (foreground) ------------- exec uvicorn proxy:app --host 0.0.0.0 --port 7860 --log-level info