DarkMindForever commited on
Commit
da36f10
·
verified ·
1 Parent(s): 7bd5dc1

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +24 -6
Dockerfile CHANGED
@@ -1,23 +1,41 @@
1
  FROM ghcr.io/ggml-org/llama.cpp:server
2
 
3
  USER root
4
- RUN apt-get update && apt-get install -y python3 python3-pip curl && rm -rf /var/lib/apt/lists/*
5
- RUN pip3 install --no-cache-dir fastapi uvicorn duckduckgo-search requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  RUN mkdir -p /models && \
7
  curl -L https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf -o /models/model.gguf && \
8
  chown -R 1000:1000 /models
9
 
 
10
  COPY --chown=1000:1000 . .
11
 
 
 
 
12
  USER 1000
13
 
14
- # High-concurrency settings
15
  ENV LLAMA_ARG_MODEL=/models/model.gguf
16
  ENV LLAMA_ARG_HOST=127.0.0.1
17
  ENV LLAMA_ARG_PORT=8080
18
  ENV LLAMA_ARG_THREADS=8
19
- ENV LLAMA_ARG_CTX_SIZE=2048
20
- ENV LLAMA_ARG_BATCH_SIZE=512
21
 
22
- # Run our Python orchestrator
23
  ENTRYPOINT ["python3", "app.py"]
 
1
  FROM ghcr.io/ggml-org/llama.cpp:server
2
 
3
  USER root
4
+
5
+ # Install Python and core build tools
6
+ RUN apt-get update && apt-get install -y \
7
+ python3 \
8
+ python3-pip \
9
+ python3-venv \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Force install the specific libraries we need to the system python
14
+ RUN pip3 install --no-cache-dir --break-system-packages \
15
+ fastapi \
16
+ uvicorn \
17
+ duckduckgo-search \
18
+ requests \
19
+ python-multipart
20
+
21
+ # Download Gemma-3-4B-it (The high-speed 2026 champ)
22
  RUN mkdir -p /models && \
23
  curl -L https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_M.gguf -o /models/model.gguf && \
24
  chown -R 1000:1000 /models
25
 
26
+ WORKDIR /app
27
  COPY --chown=1000:1000 . .
28
 
29
+ # Set Python Path to ensure modules are found
30
+ ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:/usr/lib/python3/dist-packages
31
+
32
  USER 1000
33
 
34
+ # High-concurrency settings for your 1M user goal
35
  ENV LLAMA_ARG_MODEL=/models/model.gguf
36
  ENV LLAMA_ARG_HOST=127.0.0.1
37
  ENV LLAMA_ARG_PORT=8080
38
  ENV LLAMA_ARG_THREADS=8
39
+ ENV LLAMA_ARG_CTX_SIZE=1024
 
40
 
 
41
  ENTRYPOINT ["python3", "app.py"]