srivatsavdamaraju commited on
Commit
0737dd8
·
verified ·
1 Parent(s): 3c58045

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +9 -21
Dockerfile CHANGED
@@ -1,26 +1,14 @@
1
- FROM ubuntu:22.04
2
 
3
- # Install dependencies
4
- RUN apt-get update && apt-get install -y \
5
- git build-essential cmake curl wget python3 python3-pip \
6
- && apt-get clean
7
 
8
- # Install Python dependencies (optional, depending on what you use)
9
- RUN pip3 install flask
10
 
11
- # Clone Llama.cpp
12
- RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
13
- WORKDIR /app/llama.cpp
14
 
15
- # Create build directory and build with CMake
16
- RUN mkdir build && cd build && cmake .. -DLLAMA_SERVER=ON && cmake --build . --config Release
17
 
18
- # Download a GGUF model (you can replace this with your own or upload via HF Datasets)
19
- RUN mkdir /models && \
20
- wget -O /models/llama-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf
21
-
22
- # Expose the server port
23
- EXPOSE 8080
24
-
25
- # Run the server
26
- CMD ["./build/bin/server", "-m", "/models/llama-model.gguf", "-c", "512", "--host", "0.0.0.0", "--port", "8080"]
 
1
+ FROM python:3.10-slim
2
 
3
+ WORKDIR /app
 
 
 
4
 
5
+ # Copy the model file into the container (adjust if model is in a subfolder)
6
+ COPY SmolVLM-500M-Instruct.gguf /app/
7
 
8
+ # Install llama-cpp-python with server
9
+ RUN pip install --no-cache-dir llama-cpp-python[server]
 
10
 
11
+ EXPOSE 8000
 
12
 
13
+ # Run server with your GGUF model file
14
+ CMD ["python", "-m", "llama_cpp.server", "--model", "SmolVLM-500M-Instruct.gguf"]