server1 / start.sh
Antaram's picture
Upload 3 files
d1f62f9 verified
raw
history blame contribute delete
436 Bytes
cd /llama.cpp/build
./bin/llama-server \
--host 0.0.0.0 \
--port 8080 \
--model /models/model.gguf \
--ctx-size 32768 \
--threads 2 &
echo "Waiting for llama.cpp server..."
until curl -s "http://localhost:8080/v1/models" >/dev/null 2>&1; do
sleep 1
done
echo "llama.cpp server is ready."
# Start FastAPI
echo "Starting FastAPI server on port 7860..."
cd /
python3 -m uvicorn app:app --host 0.0.0.0 --port 7860