File size: 1,688 Bytes
b85c683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Dockerfile for Bamboo-1 Vietnamese Dependency Parser Training
# Optimized for RunPod deployment
#
# Build:
#   docker build -t bamboo-1:latest -f docker/Dockerfile .
#
# Push to Docker Hub:
#   docker tag bamboo-1:latest <username>/bamboo-1:latest
#   docker push <username>/bamboo-1:latest
#
# RunPod Usage:
#   - Set image to: <username>/bamboo-1:latest
#   - Network volume mount: /runpod-volume
#   - Models saved to: /runpod-volume/models
#
# Training commands:
#   uv run scripts/train.py
#   uv run scripts/train.py --wandb --wandb-project bamboo-1

# RunPod optimized base image
# - PyTorch 2.6.0 + CUDA 12.8.1
# - Python 3.9-3.13 (default 3.12)
# - JupyterLab, SSH, NGINX pre-installed
# - uv package manager included
FROM runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204

LABEL maintainer="underthesea"
LABEL description="Bamboo-1 Vietnamese Dependency Parser - RunPod Training"

# Environment variables
ENV PYTHONUNBUFFERED=1

# Set working directory
WORKDIR /workspace/bamboo-1

# Copy dependency files first (for Docker layer cache)
COPY pyproject.toml uv.lock ./
COPY docker/requirements.txt ./

# Install dependencies with uv
# Only click and tqdm needed - PyTorch in base, data pre-included
RUN uv pip install --system -r requirements.txt

# Copy project source code
COPY bamboo1/ ./bamboo1/
COPY scripts/ ./scripts/

# Copy pre-processed data (UDD-1 CoNLL-U files, ~22MB)
# No need for datasets library at runtime
COPY data/ ./data/

# Create symlink for models to persist on RunPod network volume
RUN mkdir -p /runpod-volume/bamboo-1/models && \
    ln -sf /runpod-volume/bamboo-1/models models

# Default command - start training
CMD ["uv", "run", "scripts/train.py"]