| FROM ghcr.io/ggml-org/llama.cpp:server | |
| ENV HOST=0.0.0.0 | |
| ENV PORT=7860 | |
| EXPOSE $PORT | |
| ENTRYPOINT /app/llama-server \ | |
| --hf-repo $HF_REPO \ | |
| --hf-file $HF_FILE \ | |
| --alias "$MODEL_ALIAS" \ | |
| --api-key "$API_KEY" \ | |
| -c $N_CTX \ | |
| -ngl $N_GL \ | |
| -t $THREADS \ | |
| -b $BATCH_SIZE \ | |
| -ub $UBATCH_SIZE \ | |
| -fa on \ | |
| --cache-type-k $CACHE_TYPE_K \ | |
| --cache-type-v $CACHE_TYPE_V \ | |
| --parallel $PARALLEL \ | |
| --host $HOST \ | |
| --port $PORT |