PreethiCarmelBosco commited on
Commit
4ce919c
·
verified ·
1 Parent(s): e223620

using ollama

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -28
Dockerfile CHANGED
@@ -1,34 +1,26 @@
1
- # Use a standard Python 3.12 image
2
- FROM python:3.12-slim
3
 
4
- WORKDIR /app
5
-
6
- # --- 1. Install build-essential and cmake ---
7
- # This is necessary for compiling the C++ code
8
- RUN apt-get update && apt-get install -y build-essential cmake
9
-
10
- # --- 2. Install Python Dependencies (with CPU-only build) ---
11
- # We set CMAKE_ARGS to disable CUDA, which makes the
12
- # build *much* faster and avoids the job timeout.
13
- ENV CMAKE_ARGS="-DLLAMA_CUDA=OFF"
14
- RUN pip install "llama-cpp-python[server]" huggingface_hub
15
 
16
- # --- 3. Model Download ---
17
- # This part is correct and remains the same.
18
  COPY download_model.py .
19
  ARG HF_TOKEN
20
  RUN --mount=type=secret,id=HF_TOKEN \
21
- python download_model.py
 
 
 
 
 
 
 
 
22
 
23
- # --- 4. Server Runtime ---
24
- # This part is also correct and remains the same.
25
- EXPOSE 8000
26
- CMD [ \
27
- "python", \
28
- "-m", "llama_cpp.server", \
29
- "--model", "prem-1B-SQL.Q8_0.gguf", \
30
- "--n_gpu_layers", "0", \
31
- "--port", "8000", \
32
- "--host", "0.0.0.0", \
33
- "--api_key_env_var", "API_KEY" \
34
- ]
 
1
+ # --- 1. Use the official Ollama pre-built image ---
2
+ FROM ollama/ollama
3
 
4
+ # --- 2. Install Python & dependencies for our download script ---
5
+ # The base image is Debian, so we can use apt-get
6
+ RUN apt-get update && apt-get install -y python3 python3-pip
7
+ RUN pip install huggingface_hub
 
 
 
 
 
 
 
8
 
9
+ # --- 3. Download the GGUF model ---
10
+ WORKDIR /app
11
  COPY download_model.py .
12
  ARG HF_TOKEN
13
  RUN --mount=type=secret,id=HF_TOKEN \
14
+ python3 download_model.py
15
+
16
+ # --- 4. Create the Ollama "Modelfile" ---
17
+ # This file tells Ollama to use our downloaded GGUF
18
+ RUN echo "FROM /app/prem-1B-SQL.Q8_0.gguf" > /app/Modelfile
19
+
20
+ # --- 5. Import the model into Ollama's registry ---
21
+ # This makes the model available to serve
22
+ RUN ollama create prem-sql-api -f /app/Modelfile
23
 
24
+ # The base image's default command is "ollama serve",
25
+ # which will automatically start the API server on port 11434.
26
+ # It will also serve our newly created "prem-sql-api" model.