Dmitry Beresnev commited on
Commit
7f69342
·
1 Parent(s): cba98c9

fix dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +34 -22
Dockerfile CHANGED
@@ -1,40 +1,52 @@
1
- FROM python:3.12-slim
2
 
3
- # Install build dependencies for llama-cpp-python
4
  RUN apt-get update && apt-get install -y --no-install-recommends \
5
- gcc \
6
- g++ \
7
  cmake \
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
- RUN pip install --no-cache-dir --root-user-action=ignore uv
 
 
 
 
 
11
 
12
- WORKDIR /app
 
13
 
14
- COPY pyproject.toml /app/pyproject.toml
15
-
16
- # Install dependencies (llama-cpp-python will compile but with minimal features)
17
- #ENV CMAKE_ARGS="-DLLAMA_BLAS=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DLLAMA_F16C=OFF"
18
- RUN uv pip install --system -r pyproject.toml
19
-
20
- COPY . /app
21
 
22
- # RUN uv pip install --system --no-cache .
 
23
 
24
- # Create a non-root user
25
- # explicitly create the HF_HOME directory and give the user ownership
26
- # so they can save the downloaded model there.
27
  RUN useradd -m -u 1000 user && \
28
- mkdir -p /home/user/.cache/huggingface && \
29
  chown -R user:user /home/user
30
 
31
  USER user
 
32
 
 
33
  ENV HOME=/home/user \
34
- PATH=/home/user/.local/bin:$PATH \
35
- HF_HOME=/home/user/.cache/huggingface
36
 
37
  EXPOSE 7860
38
 
39
- # Start the application
40
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
1
+ FROM debian:bookworm-slim AS builder
2
 
3
+ # Install build dependencies
4
  RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ git \
6
+ build-essential \
7
  cmake \
8
+ ca-certificates \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # Clone and build llama.cpp
12
+ WORKDIR /build
13
+ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
14
+ cd llama.cpp && \
15
+ cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_AVX2=OFF && \
16
+ cmake --build build --config Release --target llama-server -j$(nproc)
17
 
18
+ # Runtime stage
19
+ FROM debian:bookworm-slim
20
 
21
+ # Install runtime dependencies
22
+ RUN apt-get update && apt-get install -y --no-install-recommends \
23
+ curl \
24
+ ca-certificates \
25
+ && rm -rf /var/lib/apt/lists/*
 
 
26
 
27
+ # Copy llama-server binary from builder
28
+ COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
29
 
30
+ # Create non-root user
 
 
31
  RUN useradd -m -u 1000 user && \
32
+ mkdir -p /home/user/.cache/llama.cpp && \
33
  chown -R user:user /home/user
34
 
35
  USER user
36
+ WORKDIR /home/user
37
 
38
+ # Set environment variables
39
  ENV HOME=/home/user \
40
+ LLAMA_CACHE=/home/user/.cache/llama.cpp \
41
+ PATH=/home/user/.local/bin:$PATH
42
 
43
  EXPOSE 7860
44
 
45
+ # Start llama-server with HuggingFace model
46
+ # Using TheBloke's DeepSeek Coder GGUF model
47
+ CMD ["llama-server", \
48
+ "-hf", "TheBloke/deepseek-coder-6.7B-instruct-GGUF:deepseek-coder-6.7b-instruct.Q4_K_M.gguf", \
49
+ "--host", "0.0.0.0", \
50
+ "--port", "7860", \
51
+ "-c", "2048", \
52
+ "--metrics"]