FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel # Create user for HF Spaces (runs as uid 1000) RUN useradd -m -u 1000 user || true ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH # Set working directory WORKDIR /app # Copy requirements and install # requirements.txt already pins python-docx>=1.1 and hypothesis>=6.100 # which are required by Dataset v2 tooling (document_cutter, PBT tests). COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Copy all files (training/* plus scripts/* when the build context is the # repo root). entrypoint.sh lands at /app/entrypoint.sh. COPY . . # Make the dispatch entrypoint executable. The script selects between # serve / train / train_v2 / filter_dataset_v2 / evaluate based on the # ENTRYPOINT_MODE environment variable (see training/entrypoint.sh for # details). Default is "serve" to preserve the existing HF Space # inference behaviour. RUN chmod +x /app/entrypoint.sh # Fix permissions RUN chown -R 1000:1000 /app /home/user USER 1000 # Set cache dirs to writable locations ENV HF_HOME=/home/user/.cache/huggingface ENV TORCH_HOME=/home/user/.cache/torch # Default to the inference server. Override by setting # ENTRYPOINT_MODE=train_v2 (or another supported value) on the HF Space. CMD ["/app/entrypoint.sh"]