Spaces:

cuilabs
/

bee

Running

File size: 5,373 Bytes

5e21013

"""Build and push the bee-train-online-tpu Kaggle kernel from local source.

Sister script to scripts/push_kaggle_kernel.py — same workflow, different
kernel ID, different runner source (workers/kaggle-tpu-train/train.py),
different accelerator (TpuV6E8). Both kernels can run concurrently on
distinct Kaggle quota pools (30h/week GPU vs 20h/week TPU).

Source of truth: workers/kaggle-tpu-train/train.py (content between
`# === KAGGLE-PASTE START ===` and `# === KAGGLE-PASTE END ===`).

Pre-flight guard: refuses to push if the kernel is already running or
queued (unless --force). Same lesson as the GPU script.
"""
from __future__ import annotations

import argparse
import json
import re
import subprocess
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
SOURCE = REPO_ROOT / "workers/kaggle-tpu-train/train.py"
PUSH_DIR = Path("/tmp/bee-kaggle-tpu-push")

# Distinct kernel ID so it runs alongside the GPU kernel without colliding
# in the user's kernel list. Same secrets dataset attaches.
KERNEL_ID = "ceocxx/bee-train-online-tpu"
SECRETS_DATASET = "ceocxx/bee-secrets"


def kernel_status(kernel_id: str) -> str:
    try:
        res = subprocess.run(
            ["kaggle", "kernels", "status", kernel_id],
            capture_output=True, text=True, timeout=30, check=False,
        )
        m = re.search(r'status\s+"KernelWorkerStatus\.([A-Z_]+)"', res.stdout)
        return m.group(1).lower() if m else ""
    except Exception:
        return ""


def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--force",
        action="store_true",
        help="Push even if the kernel is currently running/queued (use with care).",
    )
    args = parser.parse_args()

    status = kernel_status(KERNEL_ID)
    if status in {"running", "queued"} and not args.force:
        print(
            f"[refuse] {KERNEL_ID} status={status!r} — pushing now would create "
            f"a duplicate session and waste Kaggle quota.\n"
            f"         Use --force to override, or wait for the current run "
            f"to finish (the cron will pick up automatically).",
            file=sys.stderr,
        )
        sys.exit(2)
    if status:
        print(f"[ok] {KERNEL_ID} status={status!r} — proceeding to push.")

    src = SOURCE.read_text(encoding="utf-8")
    m = re.search(r"# === KAGGLE-PASTE START ===\n(.*?)# === KAGGLE-PASTE END ===", src, re.DOTALL)
    if not m:
        sys.exit("paste markers not found in workers/kaggle-tpu-train/train.py")
    cell_source = m.group(1).rstrip() + "\n"

    PUSH_DIR.mkdir(parents=True, exist_ok=True)
    nb = {
        "metadata": {
            "kernelspec": {"language": "python", "display_name": "Python 3", "name": "python3"},
            "language_info": {
                "pygments_lexer": "ipython3",
                "nbconvert_exporter": "python",
                "version": "3.12",
                "file_extension": ".py",
                "codemirror_mode": {"name": "ipython", "version": 3},
                "name": "python",
                "mimetype": "text/x-python",
            },
            "kaggle": {
                # Kaggle's TPU offering as of 2026-05 is v6e-8 (8 cores).
                "accelerator": "TpuV6E8",
                "dataSources": [{"sourceType": "datasetVersion", "datasetId": SECRETS_DATASET}],
                "isInternetEnabled": True,
                "language": "python",
                "sourceType": "notebook",
                # `isGpuEnabled` stays false for TPU kernels; Kaggle infers
                # the accelerator from the metadata above.
                "isGpuEnabled": False,
            },
        },
        "nbformat_minor": 4,
        "nbformat": 4,
        "cells": [
            {"cell_type": "code", "source": cell_source, "metadata": {"trusted": True},
             "outputs": [], "execution_count": None}
        ],
    }
    (PUSH_DIR / "bee-train-online-tpu.ipynb").write_text(json.dumps(nb), encoding="utf-8")

    meta = {
        "id": KERNEL_ID,
        "title": "bee-train-online-tpu",
        "code_file": "bee-train-online-tpu.ipynb",
        "language": "python",
        "kernel_type": "notebook",
        "is_private": True,
        "enable_gpu": False,
        "enable_tpu": True,
        "enable_internet": True,
        "keywords": [],
        "dataset_sources": [SECRETS_DATASET],
        "kernel_sources": [],
        "competition_sources": [],
        "model_sources": [],
    }
    (PUSH_DIR / "kernel-metadata.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")

    print(f"wrote {PUSH_DIR}/bee-train-online-tpu.ipynb ({len(cell_source)} chars in cell)")
    print(f"dataset_sources: [{SECRETS_DATASET}]")

    # Force TPU v6e-8 explicitly — same lesson as the GPU side, where the
    # CLI silently fell back to the default if the accelerator string
    # didn't match Kaggle's expected enum. `TpuV6E8` is the documented
    # Kaggle API value as of 2026-05.
    res = subprocess.run(
        ["kaggle", "kernels", "push", "-p", str(PUSH_DIR),
         "--accelerator", "TpuV6E8"],
        capture_output=True, text=True,
    )
    print(res.stdout.strip())
    if res.returncode != 0:
        print(res.stderr.strip(), file=sys.stderr)
        sys.exit(res.returncode)


if __name__ == "__main__":
    main()