#!/usr/bin/env bash # ============================================================================= # LUNA 100M — SFT Cloud Setup & Train Entrypoint # Runs on RunPod, Vast.ai, Lambda Labs, or any Linux GPU pod. # # USAGE: # bash setup_and_sft.sh [huggingface|local] [HF_REPO_OR_PATH] # # EXAMPLES: # # From HuggingFace (recommended): # bash setup_and_sft.sh huggingface ASTERIZER/LUNA # # # Already cloned locally: # bash setup_and_sft.sh local /workspace/LUNA # ============================================================================= set -e DATA_SOURCE="${1:-huggingface}" REPO_ID="${2:-ASTERIZER/LUNA}" echo "==========================================" echo " LUNA 100M — SFT Setup" echo " Source: $DATA_SOURCE" echo " Repo : $REPO_ID" echo "==========================================" # ── 1. Python packages ──────────────────────────────────────────────────────── echo "" echo "[1/4] Installing dependencies..." pip install -q --upgrade pip pip install -q \ torch torchvision \ psutil \ pyyaml \ transformers \ huggingface_hub \ datasets 2>/dev/null || true echo " Done." # ── 2. Clone repo / fetch data ──────────────────────────────────────────────── echo "" echo "[2/4] Fetching repository..." WORK_DIR="/workspace/LUNA" if [ "$DATA_SOURCE" = "huggingface" ]; then if [ ! -d "$WORK_DIR" ]; then # Clone the HF Space as a git repo HF_TOKEN="${HF_TOKEN:-}" if [ -n "$HF_TOKEN" ]; then git clone "https://huggingface.co/spaces/$REPO_ID" "$WORK_DIR" || \ python -c " from huggingface_hub import snapshot_download snapshot_download(repo_id='$REPO_ID', repo_type='space', local_dir='$WORK_DIR', token='$HF_TOKEN' if '$HF_TOKEN' else None) " else git clone "https://huggingface.co/spaces/$REPO_ID" "$WORK_DIR" || \ python -c " from huggingface_hub import snapshot_download snapshot_download(repo_id='$REPO_ID', repo_type='space', local_dir='$WORK_DIR') " fi else echo " $WORK_DIR already exists, pulling latest..." cd "$WORK_DIR" && git pull || echo " (not a git repo, using existing)" fi elif [ "$DATA_SOURCE" = "local" ]; then WORK_DIR="$REPO_ID" fi cd "$WORK_DIR" echo " Working dir: $(pwd)" # ── 3. System probe ────────────────────────────────────────────────────────── echo "" echo "[3/4] System probe..." python -c " import torch, psutil, os props = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None print(f' GPU : {props.name if props else \"CPU only\"} ({props.total_memory/1024**3:.1f} GB)' if props else ' GPU: None') print(f' RAM : {psutil.virtual_memory().total/1024**3:.1f} GB') print(f' CPUs : {os.cpu_count()}') " # Verify files exist echo "" echo " Checking required files..." for f in sft_train.py sft_config.yaml Base/Datasets/sft_clean/train.json; do if [ -f "$f" ]; then echo " ✓ $f" else echo " ✗ $f MISSING!" fi done echo " (Pretrained checkpoint will be auto-downloaded from HuggingFace if not present)" # ── 4. Train SFT ───────────────────────────────────────────────────────────── echo "" echo "[4/4] Starting SFT training..." echo "" python sft_train.py \ --config sft_config.yaml echo "" echo "==========================================" echo " SFT complete! Output: Base/out/sft/luna_100m_sft" echo "=========================================="