ContractIntel_AI / Dockerfile
satyaki-mitra's picture
Fix: Handle None LLAMA_CPP_MODEL_PATH and pre-download model
ef7c05f
FROM python:3.10-slim-bullseye
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV DOCKER_CONTAINER=true
ENV SPACE_APP_DATA=/data
ENV HF_HOME=/data/huggingface
ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
# Optimize llama-cpp-python build for CPU only
ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
ENV FORCE_CMAKE=1
WORKDIR /app
# System deps - minimal for HuggingFace Spaces
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libglib2.0-0 \
libjpeg62-turbo \
poppler-utils \
libmagic1 \
curl \
git \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# Copy requirements first for better layer caching
COPY requirements.txt /app/requirements.txt
# Install Python dependencies with specific versions
RUN pip install --upgrade pip && \
pip install -r requirements.txt --no-cache-dir
# Download spaCy model (after dependencies)
RUN python -m spacy download en_core_web_sm
# Create directories that your app expects
RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
# Download GGUF model during build
RUN python -c "from huggingface_hub import hf_hub_download; \
import shutil; \
downloaded = hf_hub_download( \
repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
filename='Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf', \
cache_dir='/data/huggingface' \
); \
shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf')" && \
echo "Model downloaded to /data/models/"
# Copy app code
COPY . .
# Set proper permissions
RUN chmod -R 755 /app && \
chmod -R 755 /data
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD curl -f http://localhost:7860/api/v1/health || exit 1
EXPOSE 7860
# Use multiple workers for better performance
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "2"]