gemme4 / start.sh
d3evil4's picture
feat: huh
e536cd5
#!/bin/bash
set -e
# Find llama-server binary
LLAMA_BIN=$(find /usr /app /llama.cpp /usr/local / -maxdepth 6 -name "llama-server" -type f 2>/dev/null | head -1)
if [ -z "$LLAMA_BIN" ]; then
echo "ERROR: llama-server binary not found"
exit 1
fi
echo "Found llama-server at: $LLAMA_BIN"
"$LLAMA_BIN" \
-m /app/gemma-4-E2B-it-UD-Q5_K_XL.gguf \
--mmproj /app/mmproj-BF16.gguf \
--host 127.0.0.1 \
--port 8080 \
-t 2 \
--cache-type-k q8_0 \
--cache-type-v iq4_nl \
-c 128000 \
-n 38912 &
LLAMA_PID=$!
echo "llama-server started (PID $LLAMA_PID)"
# Wait up to 5 minutes for llama-server to be healthy
echo "Waiting for llama-server to be ready..."
for i in $(seq 1 150); do
if curl -sf http://127.0.0.1:8080/health > /dev/null 2>&1; then
echo "llama-server is ready"
break
fi
if ! kill -0 "$LLAMA_PID" 2>/dev/null; then
echo "ERROR: llama-server process died"
exit 1
fi
if [ "$i" -eq 150 ]; then
echo "ERROR: llama-server did not become ready in time"
exit 1
fi
sleep 2
done
exec uvicorn main:app --host 0.0.0.0 --port 7860