| cd /llama.cpp/build | |
| ./bin/llama-server \ | |
| --host 0.0.0.0 \ | |
| --port 8080 \ | |
| --model /models/model.gguf \ | |
| --ctx-size 32768 \ | |
| --threads 2 & | |
| echo "Waiting for llama.cpp server..." | |
| until curl -s "http://localhost:8080/v1/models" >/dev/null 2>&1; do | |
| sleep 1 | |
| done | |
| echo "llama.cpp server is ready." | |
| # Start FastAPI | |
| echo "Starting FastAPI server on port 7860..." | |
| cd / | |
| python3 -m uvicorn app:app --host 0.0.0.0 --port 7860 |