#!/bin/bash
# =============================================================================
# HomePilot — HF Spaces Startup Script
# =============================================================================
# Starts Ollama (sidecar) + HomePilot (FastAPI + React frontend) in a single
# container. Chata personas are auto-imported on first run.
#
# Production-grade behaviours:
#   - Ollama health check waits up to 60s (HF cold-starts can be slow).
#   - Model pull retries up to 3 times, falls through a chain of lightweight
#     free-tier-friendly models, and is NEVER fatal: the app still boots so
#     the admin UI can show the setup wizard even when Ollama is offline.
#   - All paths under /tmp (HF Spaces only grants write access there).
# =============================================================================

# NOTE: no `set -e` at the top. We want the boot to continue even if the
# model pull fails — the admin UI can guide the user through a manual pull.
set -uo pipefail

echo ""
echo "  ┌──────────────────────────────────────┐"
echo "  │       🏠 HomePilot HF Space          │"
echo "  │    Private AI · Persistent Personas   │"
echo "  └──────────────────────────────────────┘"
echo ""

# ── Writable directories (HF only allows /tmp) ──────────
mkdir -p /tmp/ollama/models /tmp/homepilot/data /tmp/homepilot/uploads /tmp/homepilot/outputs
export OLLAMA_MODELS=/tmp/ollama/models
export HOME=/tmp

# ── Environment ──────────────────────────────────────────
export SQLITE_PATH=/tmp/homepilot/data/homepilot.db
export UPLOAD_DIR=/tmp/homepilot/uploads
export OUTPUT_DIR=/tmp/homepilot/outputs
export DEFAULT_PROVIDER=${DEFAULT_PROVIDER:-ollama}
export OLLAMA_BASE_URL=http://127.0.0.1:11434
# Primary model.  On HF Spaces (SPACE_ID is always set) default to the
# lighter qwen2.5:0.5b — CPU-basic runs it ~3-4x faster than the 1.5b
# variant, keeping first-token latency inside the SSE timeout budget.
# Outside HF (local dev, other hosts) the previous 1.5b default stands.
# Users who set OLLAMA_MODEL explicitly are always honored.
if [ -z "${OLLAMA_MODEL:-}" ] && [ -n "${SPACE_ID:-}" ]; then
    export OLLAMA_MODEL="qwen2.5:0.5b"
fi
export OLLAMA_MODEL=${OLLAMA_MODEL:-qwen2.5:1.5b}
# Comma-separated fallback chain tried if the primary fails to pull. Each is
# under ~1.5 GB on disk and runs comfortably in the HF free tier (16 GB RAM,
# no GPU). Order: strong-but-larger → smaller → tiny last-resort.
export OLLAMA_FALLBACK_MODELS=${OLLAMA_FALLBACK_MODELS:-qwen2.5:0.5b,llama3.2:1b,smollm2:360m}
export COMFY_BASE_URL=""
export MEDIA_BASE_URL=""
export AVATAR_SERVICE_URL=""
export CORS_ORIGINS="*"
export API_KEY=${API_KEY:-}

# ── 1. Start Ollama ─────────────────────────────────────
echo "[1/4] Starting Ollama..."
ollama serve &
OLLAMA_PID=$!

OLLAMA_READY=false
for i in $(seq 1 60); do
    if curl -sf http://127.0.0.1:11434/api/tags > /dev/null 2>&1; then
        echo "       ✓ Ollama ready (${i}s)"
        OLLAMA_READY=true
        break
    fi
    sleep 1
done

if [ "$OLLAMA_READY" != "true" ]; then
    echo "       ⚠ Ollama did not come up within 60s — continuing anyway"
    echo "         The UI setup wizard can retry the model pull after boot."
fi

# ── 2. Pull default model (with retries + fallback chain) ───
#
# We try the primary model up to 3 times. If all retries fail, we walk the
# OLLAMA_FALLBACK_MODELS chain. The app boots regardless — a missing model
# degrades gracefully to the setup wizard, which is vastly better UX than a
# container crash loop.
pull_with_retries () {
    local model="$1"
    local tries=3
    for attempt in $(seq 1 "$tries"); do
        echo "       ↓ Pulling ${model} (attempt ${attempt}/${tries})..."
        if ollama pull "$model" 2>&1 | tail -3; then
            echo "       ✓ ${model} pulled"
            return 0
        fi
        echo "       ✗ pull failed for ${model}"
        sleep $((attempt * 2))
    done
    return 1
}

MODEL_OK=false
if [ "$OLLAMA_READY" = "true" ]; then
    echo "[2/4] Checking model: ${OLLAMA_MODEL}..."
    MODEL_CHECK=$(curl -sf http://127.0.0.1:11434/api/tags 2>/dev/null || echo '{"models":[]}')
    if echo "$MODEL_CHECK" | grep -q "${OLLAMA_MODEL}"; then
        echo "       ✓ Model ${OLLAMA_MODEL} already available"
        MODEL_OK=true
    else
        # Try primary, then fallback chain.
        if pull_with_retries "$OLLAMA_MODEL"; then
            MODEL_OK=true
        else
            echo "       ↪ Trying fallback chain: ${OLLAMA_FALLBACK_MODELS}"
            IFS=',' read -ra FALLBACKS <<< "$OLLAMA_FALLBACK_MODELS"
            for fb in "${FALLBACKS[@]}"; do
                fb=$(echo "$fb" | xargs)  # trim whitespace
                [ -z "$fb" ] && continue
                if pull_with_retries "$fb"; then
                    export OLLAMA_MODEL="$fb"
                    MODEL_OK=true
                    echo "       ℹ Using fallback model: ${fb}"
                    break
                fi
            done
        fi
    fi

    if [ "$MODEL_OK" != "true" ]; then
        echo "       ⚠ No model could be pulled. App will boot without a default."
        echo "         Users can pull a model later via the Models page or:"
        echo "         curl -X POST http://127.0.0.1:11434/api/pull -d '{\"name\":\"qwen2.5:1.5b\"}'"
    fi
else
    echo "[2/4] Skipping model pull — Ollama not ready."
fi

# ── 2b. Pre-warm the Ollama runner ───────────────────────
# Fire one tiny throwaway inference so the model is already loaded in
# RAM when the first real user chat arrives.  Without this, the first
# chat pays a 2-3s cold-load cost on top of generation.  Non-blocking
# so it never delays app startup.
# Override:  OLLAMA_WARMUP=false
if [ "${MODEL_OK:-false}" = "true" ] && [ "${OLLAMA_WARMUP:-true}" = "true" ]; then
    echo "       · pre-warming runner (background)..."
    (
        curl -sSf -X POST http://127.0.0.1:11434/api/chat \
            -H "Content-Type: application/json" \
            -d "{\"model\":\"${OLLAMA_MODEL}\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}],\"stream\":false,\"options\":{\"num_predict\":1}}" \
            > /dev/null 2>&1 || true
    ) &
fi

# ── 3. Auto-import Chata personas ────────────────────────
echo "[3/4] Importing Chata personas..."
MARKER="/tmp/homepilot/data/.personas_imported"
CHATA_PERSONAS_DIR="${CHATA_PERSONAS_DIR:-/app/chata-personas}"
if [ -f "$MARKER" ]; then
    echo "       ✓ Already imported ($(cat "$MARKER"))"
else
    # Additional sources (colon/comma-separated) and an optional remote pack
    # are picked up by the importer from the environment:
    #   EXTRA_PERSONAS_DIRS=/app/extra-personas,/app/my-custom
    #   SHARED_PERSONAS_URL=https://example.com/packs/latest.zip
    EXTRA_ARGS=""
    if [ -d "/app/custom-personas" ]; then EXTRA_ARGS="$EXTRA_ARGS /app/custom-personas"; fi
    if [ -d "/app/shared-personas" ]; then EXTRA_ARGS="$EXTRA_ARGS /app/shared-personas"; fi
    if python3 /app/auto_import_personas.py "$CHATA_PERSONAS_DIR" /tmp/homepilot/data $EXTRA_ARGS; then
        echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$MARKER"
        echo "       ✓ Personas imported"
    else
        echo "       ⚠ Persona import reported an error — continuing"
    fi
fi

# ── 4. Start HomePilot ───────────────────────────────────
echo "[4/4] Starting HomePilot on :7860..."
echo ""
echo "  ┌──────────────────────────────────────┐"
echo "  │  Ready!                               │"
echo "  │                                       │"
echo "  │  App:      /                          │"
echo "  │  Health:   /health                    │"
echo "  │  API:      /docs                      │"
echo "  │  Gallery:  /community/registry        │"
echo "  │  Chat:     /v1/chat/completions       │"
echo "  └──────────────────────────────────────┘"
echo ""

# ── Chata: additive persona -> project bootstrap ──────────────────
# Imports every bundled .hpersona as a HomePilot Project so the UI
# is populated on first visit.  Optional — can be disabled via env:
#   ENABLE_PROJECT_BOOTSTRAP=false
# to ship a clean HomePilot without any pre-installed personas.
#
# Runs in the background AFTER the main server starts (so /health is
# reachable).  Output is tee'd to stdout so failures are visible in
# the HF Space run logs.  Idempotent — gated by a marker file.
if [ "${ENABLE_PROJECT_BOOTSTRAP:-true}" = "true" ] \
   && [ -f /app/chata_project_bootstrap.py ]; then
    (
        sleep 5
        for attempt in 1 2; do
            python3 /app/chata_project_bootstrap.py \
                --personas-dir "${CHATA_PERSONAS_DIR:-/app/chata-personas}" \
                --api-base http://127.0.0.1:7860 \
                --marker /tmp/homepilot/data/.projects_bootstrapped \
                $([ "$attempt" -gt 1 ] && echo "--force") \
                && break
            echo "[chata-bootstrap] retry in 30s..."
            sleep 30
        done
    ) 2>&1 | tee -a /tmp/homepilot/data/bootstrap.log &
fi

exec python3 /app/hf_wrapper.py