Spaces:

srivatsavdamaraju
/

llama_cpp_server

Build error

srivatsavdamaraju commited on May 22, 2025

Commit

3a7f2f0

verified ·

1 Parent(s): 35e2fc2

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,29 +1,45 @@
-FROM ubuntu:22.04
-# Install dependencies (minimal)
 RUN apt-get update && apt-get install -y \
     curl \
-    && rm -rf /var/lib/apt/lists/*
-# Set working directory
-WORKDIR /app
-# Copy everything into the container
-COPY . /app
-# Set env variable
-ENV LLAMA_CPP_PATH=/app/build
-# Expose a port if your llama-server does
-EXPOSE 7860
-# Run the server if it exists
-CMD ["/bin/bash", "-c", "\
-  echo 'Listing $LLAMA_CPP_PATH/bin:' && \
-  ls -l $LLAMA_CPP_PATH/bin && \
-  if [ -x \"$LLAMA_CPP_PATH/bin/llama-server\" ]; then \
-    echo OK && \
-    $LLAMA_CPP_PATH/bin/llama-server -hf ggml-org/SmolVLM-500M-Instruct-GGUF; \
-  else \
-    echo 'llama-server not found'; exit 1; \
-  fi"]

+# Hugging Face Spaces Dockerfile
+FROM ubuntu:20.04
+ENV DEBIAN_FRONTEND=noninteractive
+ENV MODEL_URL=https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/SmolVLM-500M-Instruct-q4.gguf
+# Create non-root user (required by Hugging Face)
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+# Copy project files including llama.cpp
+COPY --chown=user . /app
+USER root
+# Install required dependencies
 RUN apt-get update && apt-get install -y \
+    git \
+    cmake \
+    build-essential \
+    g++ \
+    wget \
     curl \
+    nodejs \
+    npm \
+ && rm -rf /var/lib/apt/lists/*
+# Build the web UI (optional but standard)
+WORKDIR /app/llama.cpp/examples/server/webui
+RUN npm install
+RUN npm run build
+# Build llama-server
+WORKDIR /app/llama.cpp
+RUN cmake -B build -DBUILD_SHARED_LIBS=OFF
+RUN cmake --build build --config Release -j 8
+# Download the model
+WORKDIR /app/llama.cpp/build/bin
+RUN wget -nv -O model.gguf "$MODEL_URL"
+# Run the llama-server
+CMD ["./llama-server", "--host", "0.0.0.0", "--port", "8080", "-c", "2048", "-m", "model.gguf", "--cache-type-k", "q8_0"]