DGX_AI / Dockerfile
vasiuuu's picture
fix: patch torch._pytree.register_constant + tighten torchao/transformers pins
a38ca5e
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
USER root
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"
WORKDIR /app
# Upgrade pip
RUN pip install --no-cache-dir --upgrade pip
# Install unsloth from GitHub (drags in bleeding-edge torchao)
RUN pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# Pin trl and transformers BEFORE locking torchao.
# transformers>=4.48 unconditionally does `from .quantizer_torchao import ...`
# at module init, which breaks if torchao is the wrong version.
# Stay on <4.48 where that import is guarded.
RUN pip install --no-cache-dir "trl>=0.15,<0.17" "transformers>=4.47,<4.48"
COPY --chown=user ./requirements.txt requirements.txt
# No --upgrade here; we don't want pip silently re-upgrading pinned packages.
RUN pip install --no-cache-dir -r requirements.txt
# CRITICAL: Force-downgrade torchao LAST (after every other install).
# torchao>=0.7 requires torch.utils._pytree.register_constant which does not
# exist in PyTorch 2.6.0. This must be the final pip step. (v2)
RUN pip install --no-cache-dir --force-reinstall "torchao==0.6.1"
# Copy all the code
COPY --chown=user . /app
# Ensure correct permissions for checkpoint saving
USER root
RUN chown -R user:user /app
USER user
# Hugging Face spaces expect the app to run on 7860
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]