DarkMindForever commited on
Commit
a9cb600
·
verified ·
1 Parent(s): f6cab9d

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +29 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # High-performance Qwen Vision server
2
+ FROM ghcr.io/ggml-org/llama.cpp:server
3
+
4
+ USER root
5
+ RUN apt-get update && apt-get install -y curl
6
+
7
+ # Download Qwen2-VL Model and Projector
8
+ RUN mkdir -p /models && \
9
+ curl -L https://huggingface.co/mradermacher/Nanonets-OCR2-1.5B-exp-GGUF/resolve/main/Nanonets-OCR2-1.5B-exp.Q4_K_M.gguf -o /models/model.gguf && \
10
+ chown -R 1000:1000 /models
11
+
12
+ USER 1000
13
+
14
+ # Server Configuration
15
+ ENV LLAMA_ARG_MODEL=/models/model.gguf
16
+ ENV LLAMA_ARG_HOST=0.0.0.0
17
+ ENV LLAMA_ARG_PORT=7860
18
+ ENV LLAMA_ARG_CTX_SIZE=8192
19
+ ENV LLAMA_ARG_THREADS=8
20
+ ENV LLAMA_ARG_CONT_BATCHING=true
21
+
22
+ # Performance Tuning
23
+ ENV LLAMA_ARG_BATCH_SIZE=2048
24
+ ENV LLAMA_ARG_UBATCH_SIZE=512
25
+
26
+ HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \
27
+ CMD curl -f http://localhost:7860/health || exit 1
28
+
29
+ ENTRYPOINT ["/app/llama-server"]