Spaces:
Sleeping
Sleeping
| FROM python:3.10-slim-bullseye | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PIP_NO_CACHE_DIR=1 | |
| ENV DOCKER_CONTAINER=true | |
| ENV SPACE_APP_DATA=/data | |
| ENV HF_HOME=/data/huggingface | |
| ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf | |
| # Optimize llama-cpp-python build for CPU only | |
| ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0" | |
| ENV FORCE_CMAKE=1 | |
| WORKDIR /app | |
| # System deps - minimal for HuggingFace Spaces | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| libglib2.0-0 \ | |
| libjpeg62-turbo \ | |
| poppler-utils \ | |
| libmagic1 \ | |
| curl \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && apt-get clean | |
| # Copy requirements first for better layer caching | |
| COPY requirements.txt /app/requirements.txt | |
| # Install Python dependencies with specific versions | |
| RUN pip install --upgrade pip && \ | |
| pip install -r requirements.txt --no-cache-dir | |
| # Download spaCy model (after dependencies) | |
| RUN python -m spacy download en_core_web_sm | |
| # Create directories that your app expects | |
| RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface | |
| # Download GGUF model during build | |
| RUN python -c "from huggingface_hub import hf_hub_download; \ | |
| import shutil; \ | |
| downloaded = hf_hub_download( \ | |
| repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \ | |
| filename='Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf', \ | |
| cache_dir='/data/huggingface' \ | |
| ); \ | |
| shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf')" && \ | |
| echo "Model downloaded to /data/models/" | |
| # Copy app code | |
| COPY . . | |
| # Set proper permissions | |
| RUN chmod -R 755 /app && \ | |
| chmod -R 755 /data | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ | |
| CMD curl -f http://localhost:7860/api/v1/health || exit 1 | |
| EXPOSE 7860 | |
| # Use multiple workers for better performance | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "2"] |