Spaces:

srivatsavdamaraju
/

llama_cpp_server

Build error

srivatsavdamaraju commited on May 22, 2025

Commit

0737dd8

verified ·

1 Parent(s): 3c58045

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,26 +1,14 @@
-FROM ubuntu:22.04
-# Install dependencies
-RUN apt-get update && apt-get install -y \
-    git build-essential cmake curl wget python3 python3-pip \
-    && apt-get clean
-# Install Python dependencies (optional, depending on what you use)
-RUN pip3 install flask
-# Clone Llama.cpp
-RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
-WORKDIR /app/llama.cpp
-# Create build directory and build with CMake
-RUN mkdir build && cd build && cmake .. -DLLAMA_SERVER=ON && cmake --build . --config Release
-# Download a GGUF model (you can replace this with your own or upload via HF Datasets)
-RUN mkdir /models && \
-    wget -O /models/llama-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf
-# Expose the server port
-EXPOSE 8080
-# Run the server
-CMD ["./build/bin/server", "-m", "/models/llama-model.gguf", "-c", "512", "--host", "0.0.0.0", "--port", "8080"]

+FROM python:3.10-slim
+WORKDIR /app
+# Copy the model file into the container (adjust if model is in a subfolder)
+COPY SmolVLM-500M-Instruct.gguf /app/
+# Install llama-cpp-python with server
+RUN pip install --no-cache-dir llama-cpp-python[server]
+EXPOSE 8000
+# Run server with your GGUF model file
+CMD ["python", "-m", "llama_cpp.server", "--model", "SmolVLM-500M-Instruct.gguf"]