ai-rag / Dockerfile
robrtt's picture
Clean rebuild: all features fixed
90df99c
# Multimodal AI — Unified Dockerfile for Hugging Face Spaces
FROM python:3.10-slim
ENV ANONYMIZED_TELEMETRY=False \
CHROMA_TELEMETRY_ENABLED=False \
POSTHOG_DISABLED=1 \
DO_NOT_TRACK=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HUB_DISABLE_TELEMETRY=1 \
PIP_CONSTRAINT=/opt/pip-constraints.txt
# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential curl git \
nginx supervisor \
libsm6 libxext6 libxrender1 libxkbcommon0 \
libgomp1 libglib2.0-0 \
tesseract-ocr tesseract-ocr-eng tesseract-ocr-ind \
libopenblas0 liblapack3 libblas3 \
libgl1 \
&& rm -rf /var/lib/apt/lists/*
# Global pip constraint: numpy<2 untuk kompatibilitas torch 2.1.0+cpu
RUN mkdir -p /opt && \
printf 'numpy>=1.26,<2\n' > /opt/pip-constraints.txt
WORKDIR /app
# PyTorch CPU
RUN pip install --no-cache-dir \
torch==2.1.0+cpu \
torchvision==0.16.0+cpu \
--index-url https://download.pytorch.org/whl/cpu
# Pin numpy<2 (torch 2.1.0+cpu compiled vs numpy 1.x ABI)
RUN pip install --no-cache-dir "numpy>=1.26,<2"
# RAG dependencies
COPY rag_pipeline/requirements.txt /tmp/rag-requirements.txt
RUN sed -i '/^torch==/d; /^torchvision==/d; /^numpy==/d' /tmp/rag-requirements.txt && \
pip install --no-cache-dir -r /tmp/rag-requirements.txt
# CV dependencies (tanpa transformers — captioner pakai Groq Cloud)
COPY cv_module/requirements.txt /tmp/cv-requirements.txt
RUN sed -i '/^torch==/d; /^torchvision==/d; /^numpy==/d' /tmp/cv-requirements.txt && \
pip install --no-cache-dir -r /tmp/cv-requirements.txt
# Verify numpy tidak ke-bump oleh transitive dep
RUN python -c "import numpy; assert numpy.__version__.startswith('1.'), f'numpy got bumped to {numpy.__version__}'; print(f'[ok] numpy {numpy.__version__}')"
# YOLO ONNX export at build time
COPY scripts/export_yolo_onnx.py /tmp/export_yolo_onnx.py
RUN mkdir -p /app/cv/model_cache && \
pip install --no-cache-dir ultralytics==8.2.0 && \
python /tmp/export_yolo_onnx.py && \
pip uninstall -y ultralytics && \
rm -f yolov8n.pt yolov8n.onnx /tmp/export_yolo_onnx.py
# Pre-download CLIP ViT-B-32 (~350MB) — bake ke image layer supaya tidak re-download saat cold start
# Cache disimpan eksplisit ke /root/.cache supaya tidak hilang di HF Spaces
ENV TORCH_HOME=/root/.cache/torch \
HF_HOME=/root/.cache/huggingface \
OPENAI_CLIP_CACHE=/root/.cache/clip
COPY scripts/download_clip.py /tmp/download_clip.py
RUN python /tmp/download_clip.py && rm /tmp/download_clip.py
# Patch chromadb numpy compatibility
COPY scripts/patch_chromadb_numpy2.py /tmp/patch_chromadb_numpy2.py
RUN python /tmp/patch_chromadb_numpy2.py
COPY scripts/patch_libs.py /tmp/patch_libs.py
RUN python /tmp/patch_libs.py || echo "patches finished with warnings"
# Sanity check torch+numpy ABI
RUN python -c "\
import numpy as np; \
import torch; \
print(f'[verify] numpy={np.__version__}'); \
print(f'[verify] torch={torch.__version__}'); \
arr = np.array([1.0, 2.0, 3.0]); \
t = torch.from_numpy(arr); \
assert t.sum().item() == 6.0; \
print('[verify] torch <-> numpy interop OK')"
# Source code
COPY rag_pipeline/src/ /app/rag/src/
COPY cv_module/src/ /app/cv/src/
# Frontend
COPY frontend/index.html /app/frontend/index.html
# Configs
COPY nginx.conf /etc/nginx/nginx.conf
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY start.sh /app/start.sh
RUN chmod +x /app/start.sh
# Directories
RUN mkdir -p /app/rag/chroma_db /app/rag/logs \
/app/cv/model_cache /app/cv/logs /app/cv/uploads \
/var/log/supervisor /run
EXPOSE 7860
CMD ["/app/start.sh"]