llama_cpp_server / Dockerfile
srivatsavdamaraju's picture
Update Dockerfile
8d7d685 verified
raw
history blame contribute delete
375 Bytes
FROM python:3.10-slim
WORKDIR /app
# Copy the exact model file into the container
COPY SmolVLM-500M-Instruct-Q8_0.gguf /app/
# Install llama-cpp-python with server
RUN pip install --no-cache-dir llama-cpp-python[server]
EXPOSE 8000
# Run the server with your specific model filename
CMD ["python", "-m", "llama_cpp.server", "--model", "SmolVLM-500M-Instruct-Q8_0.gguf"]