cd /llama.cpp/build ./bin/llama-server \ --host 0.0.0.0 \ --port 8080 \ --model /models/model.gguf \ --ctx-size 32768 \ --threads 2 & echo "Waiting for llama.cpp server..." until curl -s "http://localhost:8080/v1/models" >/dev/null 2>&1; do sleep 1 done echo "llama.cpp server is ready." # Start FastAPI echo "Starting FastAPI server on port 7860..." cd / python3 -m uvicorn app:app --host 0.0.0.0 --port 7860