DarkMindForever commited on
Commit
cf3e414
·
verified ·
1 Parent(s): 418cbdb

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +5 -9
Dockerfile CHANGED
@@ -1,24 +1,20 @@
1
  FROM ghcr.io/ggml-org/llama.cpp:server
2
-
3
  USER root
4
  RUN apt-get update && apt-get install -y curl
5
-
6
- # Download Gemma-3n-E2B GGUF
7
  RUN mkdir -p /models && \
8
  curl -L https://huggingface.co/unsloth/gemma-3n-E2B-it-GGUF/resolve/main/gemma-3n-E2B-it-Q4_0.gguf -o /models/model.gguf && \
9
  chown -R 1000:1000 /models
10
-
11
  USER 1000
12
  ENV LLAMA_ARG_MODEL=/models/model.gguf
13
  ENV LLAMA_ARG_HOST=0.0.0.0
14
  ENV LLAMA_ARG_PORT=7860
15
- ENV LLAMA_ARG_CTX_SIZE=8192
 
 
 
 
16
  ENV LLAMA_ARG_NO_MMAP=false
17
  ENV LLAMA_ARG_MLOCK=true
18
- ENV LLAMA_ARG_BATCH_SIZE=512
19
- ENV LLAMA_ARG_UBATCH_SIZE=128
20
-
21
  HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \
22
  CMD curl -f http://localhost:7860/health || exit 1
23
-
24
  ENTRYPOINT ["/app/llama-server"]
 
1
  FROM ghcr.io/ggml-org/llama.cpp:server
 
2
  USER root
3
  RUN apt-get update && apt-get install -y curl
 
 
4
  RUN mkdir -p /models && \
5
  curl -L https://huggingface.co/unsloth/gemma-3n-E2B-it-GGUF/resolve/main/gemma-3n-E2B-it-Q4_0.gguf -o /models/model.gguf && \
6
  chown -R 1000:1000 /models
 
7
  USER 1000
8
  ENV LLAMA_ARG_MODEL=/models/model.gguf
9
  ENV LLAMA_ARG_HOST=0.0.0.0
10
  ENV LLAMA_ARG_PORT=7860
11
+ ENV LLAMA_ARG_THREADS=8
12
+ ENV LLAMA_ARG_BATCH_SIZE=2048
13
+ ENV LLAMA_ARG_UBATCH_SIZE=512
14
+ ENV LLAMA_ARG_CTX_SIZE=4096
15
+ ENV LLAMA_ARG_FLASH_ATTN=true
16
  ENV LLAMA_ARG_NO_MMAP=false
17
  ENV LLAMA_ARG_MLOCK=true
 
 
 
18
  HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \
19
  CMD curl -f http://localhost:7860/health || exit 1
 
20
  ENTRYPOINT ["/app/llama-server"]