File size: 1,688 Bytes
b85c683 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | # Dockerfile for Bamboo-1 Vietnamese Dependency Parser Training
# Optimized for RunPod deployment
#
# Build:
# docker build -t bamboo-1:latest -f docker/Dockerfile .
#
# Push to Docker Hub:
# docker tag bamboo-1:latest <username>/bamboo-1:latest
# docker push <username>/bamboo-1:latest
#
# RunPod Usage:
# - Set image to: <username>/bamboo-1:latest
# - Network volume mount: /runpod-volume
# - Models saved to: /runpod-volume/models
#
# Training commands:
# uv run scripts/train.py
# uv run scripts/train.py --wandb --wandb-project bamboo-1
# RunPod optimized base image
# - PyTorch 2.6.0 + CUDA 12.8.1
# - Python 3.9-3.13 (default 3.12)
# - JupyterLab, SSH, NGINX pre-installed
# - uv package manager included
FROM runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204
LABEL maintainer="underthesea"
LABEL description="Bamboo-1 Vietnamese Dependency Parser - RunPod Training"
# Environment variables
ENV PYTHONUNBUFFERED=1
# Set working directory
WORKDIR /workspace/bamboo-1
# Copy dependency files first (for Docker layer cache)
COPY pyproject.toml uv.lock ./
COPY docker/requirements.txt ./
# Install dependencies with uv
# Only click and tqdm needed - PyTorch in base, data pre-included
RUN uv pip install --system -r requirements.txt
# Copy project source code
COPY bamboo1/ ./bamboo1/
COPY scripts/ ./scripts/
# Copy pre-processed data (UDD-1 CoNLL-U files, ~22MB)
# No need for datasets library at runtime
COPY data/ ./data/
# Create symlink for models to persist on RunPod network volume
RUN mkdir -p /runpod-volume/bamboo-1/models && \
ln -sf /runpod-volume/bamboo-1/models models
# Default command - start training
CMD ["uv", "run", "scripts/train.py"]
|