PreethiCarmelBosco commited on
Commit
733c3ae
·
verified ·
1 Parent(s): c7007a6
Files changed (1) hide show
  1. Dockerfile +16 -18
Dockerfile CHANGED
@@ -1,30 +1,28 @@
1
- # Use a pre-built image that already has llama-cpp-python compiled
2
- FROM ghcr.io/abetlen/llama-cpp-python:latest
3
 
4
  WORKDIR /app
5
 
6
- # Copy the download script into the container
7
- COPY download_model.py .
 
8
 
9
- # Make the HF_TOKEN secret available as an argument
10
- ARG HF_TOKEN
 
 
 
11
 
12
- # --- FIX: Use a temporary virtual env to install dependencies ---
13
- # This creates a venv, installs huggingface_hub inside it,
14
- # runs the download script, and then this venv is discarded.
15
- # This prevents our pip install from breaking the base image.
16
  RUN --mount=type=secret,id=HF_TOKEN \
17
- python -m venv /tmp/downloader-venv && \
18
- . /tmp/downloader-venv/bin/activate && \
19
- pip install huggingface_hub && \
20
  python download_model.py
21
 
22
- # --- Server Runtime ---
23
- # Expose port 8000 (which we defined in README.md)
24
  EXPOSE 8000
25
-
26
- # This command runs in the base image's original environment
27
- # which should be stable and correctly linked.
28
  CMD [ \
29
  "python", \
30
  "-m", "llama_cpp.server", \
 
1
+ # Use a standard Python 3.12 image
2
+ FROM python:3.12-slim
3
 
4
  WORKDIR /app
5
 
6
+ # --- 1. Install build-essential and cmake ---
7
+ # This is necessary for compiling the C++ code
8
+ RUN apt-get update && apt-get install -y build-essential cmake
9
 
10
+ # --- 2. Install Python Dependencies (with CPU-only build) ---
11
+ # We set CMAKE_ARGS to disable CUDA, which makes the
12
+ # build *much* faster and avoids the job timeout.
13
+ ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
14
+ RUN pip install "llama-cpp-python[server]" huggingface_hub
15
 
16
+ # --- 3. Model Download ---
17
+ # This part is correct and remains the same.
18
+ COPY download_model.py .
19
+ ARG HF_TOKEN
20
  RUN --mount=type=secret,id=HF_TOKEN \
 
 
 
21
  python download_model.py
22
 
23
+ # --- 4. Server Runtime ---
24
+ # This part is also correct and remains the same.
25
  EXPOSE 8000
 
 
 
26
  CMD [ \
27
  "python", \
28
  "-m", "llama_cpp.server", \