goodgoals commited on
Commit
73a5921
·
1 Parent(s): f0fb3ea

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +43 -0
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a lightweight Python base
2
+ FROM python:3.10-slim
3
+
4
+ # 1. Install system dependencies and jemalloc (prevents RAM fragmentation)
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ libjemalloc-dev \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Set jemalloc as the memory allocator (crucial for CPU-only training)
12
+ ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so"
13
+
14
+ # 2. Setup user for HF Spaces (non-root)
15
+ RUN useradd -m -u 1000 user
16
+ USER user
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+ WORKDIR $HOME/app
20
+
21
+ # 3. Install Python dependencies
22
+ # IPEX is key for CPU speedup on Intel Xeon (common in HF Spaces)
23
+ RUN pip install --no-cache-dir --upgrade pip && \
24
+ pip install --no-cache-dir \
25
+ torch \
26
+ intel-extension-for-pytorch \
27
+ transformers \
28
+ datasets \
29
+ accelerate \
30
+ trl \
31
+ sentencepiece
32
+
33
+ # 4. Copy your training script and local files
34
+ COPY --chown=user . $HOME/app
35
+
36
+ # 5. Set Environment Variables for CPU Threading
37
+ # Matches the standard 2vCPU or 4vCPU Space tiers
38
+ ENV OMP_NUM_THREADS=2 \
39
+ MKL_NUM_THREADS=2 \
40
+ USE_CPU=1
41
+
42
+ # 6. Run the training script
43
+ CMD ["python", "train.py"]