File size: 2,604 Bytes
6d37fa5
8ccf7d6
6136705
 
 
02578b1
 
 
8ccf7d6
6136705
 
 
 
6d37fa5
6136705
 
 
fca6db2
 
 
8ccf7d6
 
 
 
 
 
035de6f
8ccf7d6
 
 
035de6f
fca6db2
8ccf7d6
 
fca6db2
 
02578b1
fca6db2
6d37fa5
 
02578b1
6136705
 
 
02578b1
6d37fa5
 
02578b1
3d432d9
 
 
6136705
 
 
6d37fa5
 
6136705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d37fa5
02578b1
6136705
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
FROM python:3.11-slim

# -----------------------
# βœ… Environment variables
# -----------------------
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    TESSERACT_CMD=/usr/bin/tesseract \
    CHROMA_DIR=/data/chroma \
    CHROMA_ROOT=/data/chroma \
    RAG_PDF_DIR=/app/pdfs \
    PYTHONPATH=/app \
    ENV=prod

# -----------------------
# 🧩 System dependencies
# -----------------------
RUN set -eux; \
    apt-get update; \
    apt-get install -y --no-install-recommends \
        curl \
        ca-certificates \
        gnupg2 \
        apt-transport-https \
        unixodbc \
        unixodbc-dev \
        ffmpeg \
        poppler-utils \
        tesseract-ocr \
        tesseract-ocr-eng \
    ; \
    mkdir -p /etc/apt/keyrings; \
    curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/mssql-release.list; \
    apt-get update; \
    ACCEPT_EULA=Y apt-get install -y msodbcsql17; \
    mkdir -p /data/chroma; \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app

# -----------------------
# 🧩 Python dependencies
# -----------------------
RUN python -m pip install --upgrade pip
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt

# βœ… Add compatibility fix for embeddings
RUN pip install --no-cache-dir sentence-transformers==2.2.2 huggingface-hub==0.24.5

# -----------------------
# πŸ“¦ Copy application code
# -----------------------
COPY . /app

# -----------------------
# βœ… Auto-ingest script
# -----------------------
RUN echo '#!/usr/bin/env bash\n\
set -euo pipefail\n\
echo "== Container start ==" \n\
echo "ENV=${ENV:-dev}"\n\
echo "CHROMA_ROOT=${CHROMA_ROOT:-/data/chroma}"\n\
mkdir -p "${CHROMA_ROOT}"\n\
_need_ingest=0\n\
for level in low mid high; do\n\
  lvl_dir="${CHROMA_ROOT}/${level}"\n\
  if [ ! -d "$lvl_dir" ] || [ -z "$(ls -A "$lvl_dir" 2>/dev/null || true)" ]; then\n\
    _need_ingest=1\n\
  fi\n\
done\n\
if [ "${_need_ingest}" -eq 1 ]; then\n\
  echo "No Chroma data found β†’ running ingestion..."\n\
  python -m ragg.ingest_all || echo "WARNING: ingestion returned non-zero exit"\n\
else\n\
  echo "Chroma already present β†’ skipping ingestion."\n\
fi\n\
exec gunicorn --workers 2 --threads 4 --timeout 120 -b 0.0.0.0:7860 verification:app' > /app/start.sh

RUN chmod +x /app/start.sh

EXPOSE 7860

# -----------------------
# βœ… Final command
# -----------------------
CMD ["/app/start.sh"]