Spaces:
Running
Running
File size: 2,900 Bytes
e057d08 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # =============================================================================
# SAP RPT-1 Benchmarking - Multi-stage Dockerfile
# =============================================================================
# Builds two targets:
# - sap-rpt1: Python 3.11 with SAP RPT-1 OSS + all dependencies
# - baselines: Python 3.11 with XGBoost, CatBoost, LightGBM
#
# Usage:
# docker-compose build
# docker-compose run sap-rpt1
# docker-compose run baselines
# =============================================================================
# ---------- Base stage (shared by all targets) ----------
FROM python:3.11-slim AS base
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy requirements first (for Docker layer caching)
COPY requirements.txt /app/requirements.txt
# ---------- SAP RPT-1 target ----------
FROM base AS sap-rpt1
# Install core scientific stack first (heavy packages)
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
numpy==1.26.4 \
pandas==2.2.3 \
scikit-learn==1.6.1 \
scipy==1.14.1 \
matplotlib==3.9.2 \
seaborn==0.13.2
# Install Hugging Face and PyTorch stack
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/cpu \
torch==2.7.0+cpu \
transformers==4.52.4 \
accelerate==1.6.0 \
huggingface-hub==0.30.2 \
datasets==3.5.0 \
pyarrow==20.0.0 \
torcheval==0.0.7
# Install SAP RPT-1 and remaining requirements
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir -r requirements.txt
# Copy project code
COPY . /app
# Set Python path
ENV PYTHONPATH=/app/code
WORKDIR /app/code
# Set entrypoint so you can run via arguments natively
ENTRYPOINT ["python"]
CMD ["-m", "runners.run_experiment", "--dataset", "adult", "--model", "sap-rpt1-hf"]
# ---------- Baselines target ----------
FROM base AS baselines
# Install core scientific stack (heavy packages)
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
numpy==1.26.4 \
pandas==2.2.3 \
scikit-learn==1.6.1 \
scipy==1.14.1
# Install visualization and utilities
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
matplotlib==3.9.2 \
seaborn==0.13.2 \
pyyaml==6.0.2 \
tqdm==4.67.1 \
joblib==1.4.2 \
python-dotenv==1.0.1
# Install ML frameworks and OpenML
RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
openml==0.14.2 \
xgboost \
catboost \
lightgbm
# Copy project code
COPY . /app
# Set Python path
ENV PYTHONPATH=/app/code
WORKDIR /app/code
# Set entrypoint so you can run via arguments natively
ENTRYPOINT ["python"]
CMD ["-m", "runners.run_batch", "--datasets", "config/datasets.yaml", "--models", "config/models.yaml"]
|