| # Dockerfile for Bamboo-1 Vietnamese Dependency Parser Training | |
| # Optimized for RunPod deployment | |
| # | |
| # Build: | |
| # docker build -t bamboo-1:latest -f docker/Dockerfile . | |
| # | |
| # Push to Docker Hub: | |
| # docker tag bamboo-1:latest <username>/bamboo-1:latest | |
| # docker push <username>/bamboo-1:latest | |
| # | |
| # RunPod Usage: | |
| # - Set image to: <username>/bamboo-1:latest | |
| # - Network volume mount: /runpod-volume | |
| # - Models saved to: /runpod-volume/models | |
| # | |
| # Training commands: | |
| # uv run scripts/train.py | |
| # uv run scripts/train.py --wandb --wandb-project bamboo-1 | |
| # RunPod optimized base image | |
| # - PyTorch 2.6.0 + CUDA 12.8.1 | |
| # - Python 3.9-3.13 (default 3.12) | |
| # - JupyterLab, SSH, NGINX pre-installed | |
| # - uv package manager included | |
| FROM runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204 | |
| LABEL maintainer="underthesea" | |
| LABEL description="Bamboo-1 Vietnamese Dependency Parser - RunPod Training" | |
| # Environment variables | |
| ENV PYTHONUNBUFFERED=1 | |
| # Set working directory | |
| WORKDIR /workspace/bamboo-1 | |
| # Copy dependency files first (for Docker layer cache) | |
| COPY pyproject.toml uv.lock ./ | |
| COPY docker/requirements.txt ./ | |
| # Install dependencies with uv | |
| # Only click and tqdm needed - PyTorch in base, data pre-included | |
| RUN uv pip install --system -r requirements.txt | |
| # Copy project source code | |
| COPY bamboo1/ ./bamboo1/ | |
| COPY scripts/ ./scripts/ | |
| # Copy pre-processed data (UDD-1 CoNLL-U files, ~22MB) | |
| # No need for datasets library at runtime | |
| COPY data/ ./data/ | |
| # Create symlink for models to persist on RunPod network volume | |
| RUN mkdir -p /runpod-volume/bamboo-1/models && \ | |
| ln -sf /runpod-volume/bamboo-1/models models | |
| # Default command - start training | |
| CMD ["uv", "run", "scripts/train.py"] | |