Spaces:
Restarting
Restarting
File size: 2,604 Bytes
6d37fa5 8ccf7d6 6136705 02578b1 8ccf7d6 6136705 6d37fa5 6136705 fca6db2 8ccf7d6 035de6f 8ccf7d6 035de6f fca6db2 8ccf7d6 fca6db2 02578b1 fca6db2 6d37fa5 02578b1 6136705 02578b1 6d37fa5 02578b1 3d432d9 6136705 6d37fa5 6136705 6d37fa5 02578b1 6136705 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | FROM python:3.11-slim
# -----------------------
# β
Environment variables
# -----------------------
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
TESSERACT_CMD=/usr/bin/tesseract \
CHROMA_DIR=/data/chroma \
CHROMA_ROOT=/data/chroma \
RAG_PDF_DIR=/app/pdfs \
PYTHONPATH=/app \
ENV=prod
# -----------------------
# π§© System dependencies
# -----------------------
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
curl \
ca-certificates \
gnupg2 \
apt-transport-https \
unixodbc \
unixodbc-dev \
ffmpeg \
poppler-utils \
tesseract-ocr \
tesseract-ocr-eng \
; \
mkdir -p /etc/apt/keyrings; \
curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list; \
apt-get update; \
ACCEPT_EULA=Y apt-get install -y msodbcsql17; \
mkdir -p /data/chroma; \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# -----------------------
# π§© Python dependencies
# -----------------------
RUN python -m pip install --upgrade pip
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
# β
Add compatibility fix for embeddings
RUN pip install --no-cache-dir sentence-transformers==2.2.2 huggingface-hub==0.24.5
# -----------------------
# π¦ Copy application code
# -----------------------
COPY . /app
# -----------------------
# β
Auto-ingest script
# -----------------------
RUN echo '#!/usr/bin/env bash\n\
set -euo pipefail\n\
echo "== Container start ==" \n\
echo "ENV=${ENV:-dev}"\n\
echo "CHROMA_ROOT=${CHROMA_ROOT:-/data/chroma}"\n\
mkdir -p "${CHROMA_ROOT}"\n\
_need_ingest=0\n\
for level in low mid high; do\n\
lvl_dir="${CHROMA_ROOT}/${level}"\n\
if [ ! -d "$lvl_dir" ] || [ -z "$(ls -A "$lvl_dir" 2>/dev/null || true)" ]; then\n\
_need_ingest=1\n\
fi\n\
done\n\
if [ "${_need_ingest}" -eq 1 ]; then\n\
echo "No Chroma data found β running ingestion..."\n\
python -m ragg.ingest_all || echo "WARNING: ingestion returned non-zero exit"\n\
else\n\
echo "Chroma already present β skipping ingestion."\n\
fi\n\
exec gunicorn --workers 2 --threads 4 --timeout 120 -b 0.0.0.0:7860 verification:app' > /app/start.sh
RUN chmod +x /app/start.sh
EXPOSE 7860
# -----------------------
# β
Final command
# -----------------------
CMD ["/app/start.sh"]
|