FineTune / Dockerfile
goodgoals's picture
Create Dockerfile
73a5921
# Use a lightweight Python base
FROM python:3.10-slim
# 1. Install system dependencies and jemalloc (prevents RAM fragmentation)
RUN apt-get update && apt-get install -y \
build-essential \
libjemalloc-dev \
git \
&& rm -rf /var/lib/apt/lists/*
# Set jemalloc as the memory allocator (crucial for CPU-only training)
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so"
# 2. Setup user for HF Spaces (non-root)
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
WORKDIR $HOME/app
# 3. Install Python dependencies
# IPEX is key for CPU speedup on Intel Xeon (common in HF Spaces)
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir \
torch \
intel-extension-for-pytorch \
transformers \
datasets \
accelerate \
trl \
sentencepiece
# 4. Copy your training script and local files
COPY --chown=user . $HOME/app
# 5. Set Environment Variables for CPU Threading
# Matches the standard 2vCPU or 4vCPU Space tiers
ENV OMP_NUM_THREADS=2 \
MKL_NUM_THREADS=2 \
USE_CPU=1
# 6. Run the training script
CMD ["python", "train.py"]