gemma4-e4b-ablitereted-server

Sleeping

AI Assistant

Squash history and optimize Hugging Face Space configuration

aac2d6f about 2 months ago

464 Bytes

	FROM ghcr.io/ggml-org/llama.cpp:server


	ENV HOST=0.0.0.0
	ENV PORT=7860

	EXPOSE $PORT

	ENTRYPOINT /app/llama-server \
	--hf-repo $HF_REPO \
	--hf-file $HF_FILE \
	--alias "$MODEL_ALIAS" \
	--api-key "$API_KEY" \
	-c $N_CTX \
	-ngl $N_GL \
	-t $THREADS \
	-b $BATCH_SIZE \
	-ub $UBATCH_SIZE \
	-fa on \
	--cache-type-k $CACHE_TYPE_K \
	--cache-type-v $CACHE_TYPE_V \
	--parallel $PARALLEL \
	--host $HOST \
	--port $PORT