#!/bin/bash set -e # Find llama-server binary LLAMA_BIN=$(find /usr /app /llama.cpp /usr/local / -maxdepth 6 -name "llama-server" -type f 2>/dev/null | head -1) if [ -z "$LLAMA_BIN" ]; then echo "ERROR: llama-server binary not found" exit 1 fi echo "Found llama-server at: $LLAMA_BIN" "$LLAMA_BIN" \ -m /app/gemma-4-E2B-it-UD-Q5_K_XL.gguf \ --mmproj /app/mmproj-BF16.gguf \ --host 127.0.0.1 \ --port 8080 \ -t 2 \ --cache-type-k q8_0 \ --cache-type-v iq4_nl \ -c 128000 \ -n 38912 & LLAMA_PID=$! echo "llama-server started (PID $LLAMA_PID)" # Wait up to 5 minutes for llama-server to be healthy echo "Waiting for llama-server to be ready..." for i in $(seq 1 150); do if curl -sf http://127.0.0.1:8080/health > /dev/null 2>&1; then echo "llama-server is ready" break fi if ! kill -0 "$LLAMA_PID" 2>/dev/null; then echo "ERROR: llama-server process died" exit 1 fi if [ "$i" -eq 150 ]; then echo "ERROR: llama-server did not become ready in time" exit 1 fi sleep 2 done exec uvicorn main:app --host 0.0.0.0 --port 7860