| #!/usr/bin/env bash |
| |
| |
| |
| set -u |
|
|
| |
| |
| |
| if [ "${USE_STUB_EXTRACTOR:-0}" = "1" ]; then |
| echo "[start] UI-only (USE_STUB_EXTRACTOR=1) — skipping llama-server" |
| exec python3 app.py |
| fi |
|
|
| LS="$(command -v llama-server || echo /app/llama-server)" |
| |
| |
| export LD_LIBRARY_PATH="$(dirname "$LS"):/app:${LD_LIBRARY_PATH:-}" |
| echo "[start] using llama-server at: $LS (LD_LIBRARY_PATH=$LD_LIBRARY_PATH)" |
|
|
| |
| |
| |
| if [ -n "${MODEL_FILE:-}" ]; then |
| echo "[start] model: ${MODEL_HF_REPO}/${MODEL_FILE} (explicit file; downloads on first run)" |
| MODEL_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${MODEL_HF_REPO}', '${MODEL_FILE}'))")" |
| MODEL_ARGS="-m $MODEL_PATH" |
| else |
| echo "[start] model: ${MODEL_HF_REPO}:${MODEL_QUANT:-Q4_K_M} (downloads on first run)" |
| MODEL_ARGS="-hf ${MODEL_HF_REPO}:${MODEL_QUANT:-Q4_K_M}" |
| fi |
|
|
| |
| |
| |
| |
| MMPROJ_ARG="" |
| if [ -n "${MMPROJ_FILE:-}" ]; then |
| MMPROJ_REPO="${MMPROJ_REPO:-$MODEL_HF_REPO}" |
| echo "[start] fetching mmproj ${MMPROJ_REPO}/${MMPROJ_FILE} for vision..." |
| MMPROJ_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${MMPROJ_REPO}', '${MMPROJ_FILE}'))" 2>/dev/null || true)" |
| if [ -n "$MMPROJ_PATH" ]; then |
| MMPROJ_ARG="--mmproj $MMPROJ_PATH" |
| echo "[start] mmproj ready: $MMPROJ_PATH" |
| else |
| echo "[start] mmproj download failed -> text-only" |
| fi |
| fi |
|
|
| |
| "$LS" $MODEL_ARGS \ |
| --host 127.0.0.1 --port 8080 \ |
| -ngl 999 -c 8192 --jinja $MMPROJ_ARG & |
| LLAMA_PID=$! |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| if [ -n "${PLANNER_HF_REPO:-}" ] && [ -n "${PLANNER_FILE:-}" ]; then |
| echo "[start] planner: ${PLANNER_HF_REPO}/${PLANNER_FILE} on :${PLANNER_PORT:-8081}" |
| PLANNER_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${PLANNER_HF_REPO}', '${PLANNER_FILE}'))")" |
| "$LS" -m "$PLANNER_PATH" \ |
| --host 127.0.0.1 --port "${PLANNER_PORT:-8081}" \ |
| -ngl "${PLANNER_NGL:-999}" -c "${PLANNER_CTX:-8192}" --jinja & |
| echo "[start] planner launching (PLANNER_BASE_URL should be http://127.0.0.1:${PLANNER_PORT:-8081}/v1)" |
| fi |
|
|
| echo "[start] waiting for llama-server health (model download can take minutes)..." |
| for i in $(seq 1 900); do |
| if ! kill -0 "$LLAMA_PID" 2>/dev/null; then |
| echo "[start] ERROR: llama-server exited early"; break |
| fi |
| if curl -sf http://127.0.0.1:8080/health >/dev/null 2>&1; then |
| echo "[start] llama-server ready after ~$((i*2))s"; break |
| fi |
| sleep 2 |
| done |
|
|
| echo "[start] launching app (UI + /agent) -> INFERENCE_BASE_URL=$INFERENCE_BASE_URL" |
| exec python3 app.py |
|
|