llama-server / Dockerfile
subhrajit-mohanty's picture
Update Dockerfile
58c9b59 verified
# Start with Ubuntu 22.04 as base
FROM ubuntu:22.04
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
python3 \
python3-pip \
wget \
curl \
pkg-config \
libopenblas-dev \
ninja-build \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Clone and build llama.cpp - with proper error handling
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
cd llama.cpp && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \
cmake --build . --config Release && \
# Install the server binary to a location in PATH
cp bin/server /usr/local/bin/ && \
cd .. && \
# Make sure the examples directory is available
cp -r examples /usr/local/share/llama.cpp-examples
# Install Python dependencies
RUN pip3 install --no-cache-dir huggingface_hub
# Create a directory for model cache
RUN mkdir -p /root/.cache/huggingface/hub
# Expose the Hugging Face Spaces port
EXPOSE 7860
# Create a script to download model and run the server
RUN echo '#!/bin/bash\n\
echo "Downloading model from Hugging Face..."\n\
mkdir -p /models\n\
huggingface-cli download ggml-org/SmolVLM-500M-Instruct-GGUF --local-dir=/models\n\
echo "Starting server..."\n\
server -m /models/SmolVLM-500M-Instruct-GGUF/smolvlm-500m-instruct.Q4_K_M.gguf --host 0.0.0.0 --port 7860\n' > /app/start.sh && \
chmod +x /app/start.sh
# Set the startup command
CMD ["/app/start.sh"]