Aloukik21 commited on
Commit
e334188
·
verified ·
1 Parent(s): 54c3d8d

Update Dockerfile with cache optimization

Browse files
Files changed (1) hide show
  1. Dockerfile.runpod +89 -32
Dockerfile.runpod CHANGED
@@ -1,45 +1,102 @@
1
- FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- ENV DEBIAN_FRONTEND=noninteractive
4
- ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0"
5
- ENV PYTHONUNBUFFERED=1
6
 
7
- # HuggingFace settings
8
- ENV HF_HUB_ENABLE_HF_TRANSFER=1
9
- ENV HF_HOME=/runpod-volume/huggingface-cache
10
- ENV HUGGINGFACE_HUB_CACHE=/runpod-volume/huggingface-cache/hub
11
- ENV TRANSFORMERS_CACHE=/runpod-volume/huggingface-cache/hub
 
 
 
 
 
 
 
12
 
13
- # Disable telemetry
14
- ENV NO_ALBUMENTATIONS_UPDATE=1
15
- ENV DISABLE_TELEMETRY=YES
16
 
17
- # Install system deps
18
- RUN apt-get update && apt-get install -y --no-install-recommends \
19
- git git-lfs curl wget python3.10 python3.10-dev python3-pip \
20
- ffmpeg libgl1-mesa-glx libglib2.0-0 aria2 \
21
- && rm -rf /var/lib/apt/lists/*
 
 
 
22
 
23
- RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
24
- RUN pip install --upgrade pip
 
 
 
25
 
26
- # Install PyTorch
27
- RUN pip install --no-cache-dir torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 \
28
- --index-url https://download.pytorch.org/whl/cu124
 
 
29
 
30
- # Install RunPod and HF transfer
31
- RUN pip install --no-cache-dir runpod hf_transfer huggingface_hub
 
32
 
33
- # Copy project
34
- WORKDIR /app
35
- COPY ai-toolkit /app/ai-toolkit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  COPY rp_handler.py /app/rp_handler.py
37
 
38
- # Install ai-toolkit requirements
39
- RUN pip install --no-cache-dir -r /app/ai-toolkit/requirements.txt
40
 
41
- # Create workspace directories
42
- RUN mkdir -p /workspace/dataset /workspace/output
 
 
43
 
44
- WORKDIR /app
45
  CMD ["python", "-u", "rp_handler.py"]
 
1
+ # =============================================================================
2
+ # AI-Toolkit Trainer - RunPod Serverless Worker (CACHE OPTIMIZED)
3
+ # =============================================================================
4
+ # CACHE OPTIMIZATION: Layers ordered from LEAST to MOST frequently changed
5
+ #
6
+ # Layer Order (top = rarely changes, bottom = frequently changes):
7
+ # 1. Base image + system deps [RARELY CHANGE]
8
+ # 2. PyTorch + CUDA [RARELY CHANGE]
9
+ # 3. AI-Toolkit requirements [OCCASIONALLY CHANGE]
10
+ # 4. AI-Toolkit code [OCCASIONALLY CHANGE]
11
+ # 5. RunPod + HF deps [RARELY CHANGE]
12
+ # 6. Directory setup [RARELY CHANGE]
13
+ # 7. rp_handler.py [FREQUENTLY CHANGE]
14
+ #
15
+ # Build:
16
+ # docker buildx build --platform linux/amd64 -f Dockerfile.runpod \
17
+ # -t aloukikaditya/trainer:latest --push .
18
+ #
19
+ # Build with cache:
20
+ # DOCKER_BUILDKIT=1 docker build -f Dockerfile.runpod -t aio-trainer .
21
+ # =============================================================================
22
 
23
+ ARG BASE_IMAGE=runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04
24
+ FROM ${BASE_IMAGE}
 
25
 
26
+ # -----------------------------------------------------------------------------
27
+ # [LAYER 1] Environment Configuration - RARELY CHANGES
28
+ # -----------------------------------------------------------------------------
29
+ ENV PYTHONUNBUFFERED=1 \
30
+ DEBIAN_FRONTEND=noninteractive \
31
+ HF_HUB_ENABLE_HF_TRANSFER=1 \
32
+ HF_HOME=/runpod-volume/huggingface-cache \
33
+ HUGGINGFACE_HUB_CACHE=/runpod-volume/huggingface-cache/hub \
34
+ TRANSFORMERS_CACHE=/runpod-volume/huggingface-cache/hub \
35
+ NO_ALBUMENTATIONS_UPDATE=1 \
36
+ DISABLE_TELEMETRY=YES \
37
+ TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0"
38
 
39
+ WORKDIR /app
 
 
40
 
41
+ # -----------------------------------------------------------------------------
42
+ # [LAYER 2] System Dependencies - RARELY CHANGES
43
+ # -----------------------------------------------------------------------------
44
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
45
+ --mount=type=cache,target=/var/lib/apt,sharing=locked \
46
+ apt-get update && apt-get install -y --no-install-recommends \
47
+ git git-lfs curl wget ffmpeg libgl1-mesa-glx libglib2.0-0 aria2 \
48
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
49
 
50
+ # -----------------------------------------------------------------------------
51
+ # [LAYER 3] PyTorch (use base image PyTorch or install specific version)
52
+ # -----------------------------------------------------------------------------
53
+ # Base image already has PyTorch, verify it
54
+ RUN python -c "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}')"
55
 
56
+ # -----------------------------------------------------------------------------
57
+ # [LAYER 4] AI-Toolkit Requirements - OCCASIONALLY CHANGES
58
+ # -----------------------------------------------------------------------------
59
+ # Copy only requirements first for better caching
60
+ COPY ai-toolkit/requirements.txt /app/ai-toolkit/requirements.txt
61
 
62
+ RUN --mount=type=cache,target=/root/.cache/pip \
63
+ pip install --upgrade pip && \
64
+ pip install -r /app/ai-toolkit/requirements.txt
65
 
66
+ # -----------------------------------------------------------------------------
67
+ # [LAYER 5] RunPod + HuggingFace Dependencies - RARELY CHANGES
68
+ # -----------------------------------------------------------------------------
69
+ RUN --mount=type=cache,target=/root/.cache/pip \
70
+ pip install runpod hf_transfer huggingface_hub
71
+
72
+ # -----------------------------------------------------------------------------
73
+ # [LAYER 6] AI-Toolkit Code - OCCASIONALLY CHANGES
74
+ # -----------------------------------------------------------------------------
75
+ COPY ai-toolkit/ /app/ai-toolkit/
76
+
77
+ # Verify ai-toolkit is properly installed
78
+ RUN python -c "import sys; sys.path.insert(0, '/app/ai-toolkit'); print('AI-Toolkit ready')"
79
+
80
+ # -----------------------------------------------------------------------------
81
+ # [LAYER 7] Directory Setup - RARELY CHANGES
82
+ # -----------------------------------------------------------------------------
83
+ RUN mkdir -p \
84
+ /workspace/dataset \
85
+ /workspace/output \
86
+ /runpod-volume/huggingface-cache/hub
87
+
88
+ # -----------------------------------------------------------------------------
89
+ # [LAYER 8] Handler Code - FREQUENTLY CHANGES
90
+ # -----------------------------------------------------------------------------
91
+ # This layer is last so changes to handler don't invalidate ai-toolkit cache
92
  COPY rp_handler.py /app/rp_handler.py
93
 
94
+ # Verify handler imports work
95
+ RUN python -c "from rp_handler import handler, MODEL_PRESETS; print(f'Handler ready: {list(MODEL_PRESETS.keys())}')"
96
 
97
+ # -----------------------------------------------------------------------------
98
+ # Runtime Configuration
99
+ # -----------------------------------------------------------------------------
100
+ EXPOSE 8000
101
 
 
102
  CMD ["python", "-u", "rp_handler.py"]