Spaces:
Running
Running
| set -e | |
| # Find llama-server binary | |
| LLAMA_BIN=$(find /usr /app /llama.cpp /usr/local / -maxdepth 6 -name "llama-server" -type f 2>/dev/null | head -1) | |
| if [ -z "$LLAMA_BIN" ]; then | |
| echo "ERROR: llama-server binary not found" | |
| exit 1 | |
| fi | |
| echo "Found llama-server at: $LLAMA_BIN" | |
| "$LLAMA_BIN" \ | |
| -m /app/gemma-4-E2B-it-UD-Q5_K_XL.gguf \ | |
| --mmproj /app/mmproj-BF16.gguf \ | |
| --host 127.0.0.1 \ | |
| --port 8080 \ | |
| -t 2 \ | |
| --cache-type-k q8_0 \ | |
| --cache-type-v iq4_nl \ | |
| -c 128000 \ | |
| -n 38912 & | |
| LLAMA_PID=$! | |
| echo "llama-server started (PID $LLAMA_PID)" | |
| # Wait up to 5 minutes for llama-server to be healthy | |
| echo "Waiting for llama-server to be ready..." | |
| for i in $(seq 1 150); do | |
| if curl -sf http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| echo "llama-server is ready" | |
| break | |
| fi | |
| if ! kill -0 "$LLAMA_PID" 2>/dev/null; then | |
| echo "ERROR: llama-server process died" | |
| exit 1 | |
| fi | |
| if [ "$i" -eq 150 ]; then | |
| echo "ERROR: llama-server did not become ready in time" | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| exec uvicorn main:app --host 0.0.0.0 --port 7860 | |