Spaces:

subhrajit-mohanty
/

llama-server

Build error

llama-server / Dockerfile

Update Dockerfile

58c9b59 verified 12 months ago

1.58 kB

	# Start with Ubuntu 22.04 as base
	FROM ubuntu:22.04

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive

	# Install system dependencies
	RUN apt-get update && apt-get install -y \
	build-essential \
	cmake \
	git \
	python3 \
	python3-pip \
	wget \
	curl \
	pkg-config \
	libopenblas-dev \
	ninja-build \
	&& rm -rf /var/lib/apt/lists/*

	# Set working directory
	WORKDIR /app

	# Clone and build llama.cpp - with proper error handling
	RUN git clone https://github.com/ggerganov/llama.cpp.git && \
	cd llama.cpp && \
	mkdir build && \
	cd build && \
	cmake .. -DCMAKE_BUILD_TYPE=Release && \
	cmake --build . --config Release && \
	# Install the server binary to a location in PATH
	cp bin/server /usr/local/bin/ && \
	cd .. && \
	# Make sure the examples directory is available
	cp -r examples /usr/local/share/llama.cpp-examples

	# Install Python dependencies
	RUN pip3 install --no-cache-dir huggingface_hub

	# Create a directory for model cache
	RUN mkdir -p /root/.cache/huggingface/hub

	# Expose the Hugging Face Spaces port
	EXPOSE 7860

	# Create a script to download model and run the server
	RUN echo '#!/bin/bash\n\
	echo "Downloading model from Hugging Face..."\n\
	mkdir -p /models\n\
	huggingface-cli download ggml-org/SmolVLM-500M-Instruct-GGUF --local-dir=/models\n\
	echo "Starting server..."\n\
	server -m /models/SmolVLM-500M-Instruct-GGUF/smolvlm-500m-instruct.Q4_K_M.gguf --host 0.0.0.0 --port 7860\n' > /app/start.sh && \
	chmod +x /app/start.sh

	# Set the startup command
	CMD ["/app/start.sh"]