bamboo-1 / docker /Dockerfile
rain1024's picture
Initial commit: Vietnamese dependency parser with Biaffine architecture
b85c683
# Dockerfile for Bamboo-1 Vietnamese Dependency Parser Training
# Optimized for RunPod deployment
#
# Build:
# docker build -t bamboo-1:latest -f docker/Dockerfile .
#
# Push to Docker Hub:
# docker tag bamboo-1:latest <username>/bamboo-1:latest
# docker push <username>/bamboo-1:latest
#
# RunPod Usage:
# - Set image to: <username>/bamboo-1:latest
# - Network volume mount: /runpod-volume
# - Models saved to: /runpod-volume/models
#
# Training commands:
# uv run scripts/train.py
# uv run scripts/train.py --wandb --wandb-project bamboo-1
# RunPod optimized base image
# - PyTorch 2.6.0 + CUDA 12.8.1
# - Python 3.9-3.13 (default 3.12)
# - JupyterLab, SSH, NGINX pre-installed
# - uv package manager included
FROM runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204
LABEL maintainer="underthesea"
LABEL description="Bamboo-1 Vietnamese Dependency Parser - RunPod Training"
# Environment variables
ENV PYTHONUNBUFFERED=1
# Set working directory
WORKDIR /workspace/bamboo-1
# Copy dependency files first (for Docker layer cache)
COPY pyproject.toml uv.lock ./
COPY docker/requirements.txt ./
# Install dependencies with uv
# Only click and tqdm needed - PyTorch in base, data pre-included
RUN uv pip install --system -r requirements.txt
# Copy project source code
COPY bamboo1/ ./bamboo1/
COPY scripts/ ./scripts/
# Copy pre-processed data (UDD-1 CoNLL-U files, ~22MB)
# No need for datasets library at runtime
COPY data/ ./data/
# Create symlink for models to persist on RunPod network volume
RUN mkdir -p /runpod-volume/bamboo-1/models && \
ln -sf /runpod-volume/bamboo-1/models models
# Default command - start training
CMD ["uv", "run", "scripts/train.py"]