Spaces:

truegleai
/

deepseek-coder-6b-api

Sleeping

Deploy FastAPI server with CodeLlama 7B

1e3c6e8 verified 3 months ago

1.29 kB

	# Dockerfile for HuggingFace Spaces - Build llama-cpp-python from source
	FROM python:3.11-slim-bookworm

	# Set working directory
	WORKDIR /app

	# Install system dependencies including CMAKE build tools
	RUN apt-get update && apt-get install -y \
	build-essential \
	cmake \
	git \
	curl \
	&& rm -rf /var/lib/apt/lists/*

	# Copy requirements FIRST (for better caching)
	COPY requirements.txt .

	# Install Python dependencies EXCEPT llama-cpp-python
	RUN pip install --no-cache-dir --upgrade pip && \
	grep -v "llama-cpp-python" requirements.txt > requirements_temp.txt && \
	pip install --no-cache-dir -r requirements_temp.txt && \
	rm requirements_temp.txt

	# Build llama-cpp-python from source with CMAKE (this is the key!)
	# Force CMAKE build to ensure it compiles against glibc, not using musl wheels
	RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
	FORCE_CMAKE=1 \
	pip install --no-cache-dir --force-reinstall --upgrade --verbose \
	llama-cpp-python==0.2.90

	# Copy application code
	COPY app.py .

	# Expose port (HF Spaces uses 7860)
	EXPOSE 7860

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
	CMD curl -f http://localhost:7860/health \|\| exit 1

	# Run the FastAPI app
	CMD ["python", "app.py"]