FROM python:3.11-slim WORKDIR /app # JAX memory management — don't preallocate all GPU memory at startup ENV XLA_PYTHON_CLIENT_PREALLOCATE=false ENV XLA_PYTHON_CLIENT_MEM_FRACTION=0.95 RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 ca-certificates curl git \ libeccodes0 \ && rm -rf /var/lib/apt/lists/* # Install JAX with CUDA 12 support (separate layer for caching) RUN pip install --no-cache-dir "jax[cuda12]" # Install PyTorch CPU-only (sentence-transformers/RAG depends on it) RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu # Install remaining Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Create appuser RUN adduser --disabled-password --gecos '' appuser COPY config.py . COPY src/ src/ COPY models/ models/ COPY scripts/ scripts/ COPY data/era5land_dar_es_salaam.json data/era5land_dar_es_salaam.json COPY data/landsat_lst/ data/landsat_lst/ # Build RAG index at image build time RUN python scripts/build_rag_index.py RUN chown -R appuser:appuser /app USER appuser EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --retries=5 --start-period=60s \ CMD curl -f http://localhost:7860/health || exit 1 CMD ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860"]