FROM ghcr.io/ggml-org/llama.cpp:server ENV HOST=0.0.0.0 ENV PORT=7860 EXPOSE $PORT ENTRYPOINT /app/llama-server \ --hf-repo $HF_REPO \ --hf-file $HF_FILE \ --alias "$MODEL_ALIAS" \ --api-key "$API_KEY" \ -c $N_CTX \ -ngl $N_GL \ -t $THREADS \ -b $BATCH_SIZE \ -ub $UBATCH_SIZE \ -fa on \ --cache-type-k $CACHE_TYPE_K \ --cache-type-v $CACHE_TYPE_V \ --parallel $PARALLEL \ --host $HOST \ --port $PORT