| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| HF_TOKEN="${HF_TOKEN:?Set HF_TOKEN env var}" |
|
|
| CODE_REPO="ASTERIZER/LUNA-Training" |
| MODEL_REPO="ASTERIZER/LUNA-100M" |
| DATASET_REPO="ASTERIZER/LUNA-RAG-MCP-SFT-10M" |
| WORK_DIR="/workspace/luna" |
|
|
| echo "============================================================" |
| echo " LUNA 100M β LoRA SFT (RAG/MCP) β GPU Setup" |
| echo "============================================================" |
|
|
| |
| echo "[1/6] Installing system dependencies..." |
| apt-get update -qq && apt-get install -y -qq git git-lfs python3-pip > /dev/null 2>&1 |
| git lfs install --skip-smudge > /dev/null 2>&1 |
|
|
| |
| echo "[2/6] Cloning training code from $CODE_REPO..." |
| mkdir -p "$WORK_DIR" |
| cd "$WORK_DIR" |
|
|
| if [ ! -f "lora_sft_train.py" ] || [ ! -f "upload_lora_to_hf.py" ]; then |
| pip install -q huggingface_hub |
| python3 -c " |
| from huggingface_hub import snapshot_download |
| snapshot_download( |
| repo_id='${CODE_REPO}', |
| local_dir='${WORK_DIR}', |
| token='${HF_TOKEN}', |
| ) |
| print('Code downloaded.') |
| " |
| fi |
|
|
| |
| echo "[3/6] Installing Python dependencies..." |
| pip install -q torch --index-url https://download.pytorch.org/whl/cu121 2>/dev/null || true |
| pip install -q -r requirements.txt 2>/dev/null |
|
|
| |
| echo "[4/6] Downloading SFT base model from $MODEL_REPO..." |
| python3 -c " |
| import os |
| from pathlib import Path |
| from huggingface_hub import hf_hub_download |
| |
| ckpt_dir = Path('Base/out/input_models/luna_sft_v1') |
| target = ckpt_dir / 'sft_v1' / 'final' / 'model.pth' |
| if target.exists(): |
| print(f'Checkpoint already exists: {target}') |
| else: |
| ckpt_dir.mkdir(parents=True, exist_ok=True) |
| hf_hub_download( |
| repo_id='${MODEL_REPO}', |
| filename='sft_v1/final/model.pth', |
| local_dir=str(ckpt_dir), |
| token=os.environ.get('HF_TOKEN'), |
| ) |
| print('Model downloaded.') |
| " |
|
|
| |
| echo "[5/6] Downloading RAG/MCP dataset from $DATASET_REPO..." |
| python3 -c " |
| import os |
| from pathlib import Path |
| from huggingface_hub import hf_hub_download |
| |
| data_dir = Path('Base/Datasets/rag_mcp_sft') |
| data_dir.mkdir(parents=True, exist_ok=True) |
| |
| for fname in ['train.json', 'val.json']: |
| target = data_dir / fname |
| if target.exists(): |
| print(f'Already exists: {target}') |
| continue |
| hf_hub_download( |
| repo_id='${DATASET_REPO}', |
| filename=fname, |
| local_dir=str(data_dir), |
| repo_type='dataset', |
| token=os.environ.get('HF_TOKEN'), |
| ) |
| print(f'Downloaded: {fname}') |
| " |
|
|
| |
| echo "[6/6] Starting LoRA SFT training..." |
| echo "============================================================" |
|
|
| nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true |
| echo "" |
|
|
| CUDA_VISIBLE_DEVICES=0 python3 lora_sft_train.py \ |
| --config rag_mcp_lora_config.yaml |
|
|
| echo "============================================================" |
| echo " Training complete!" |
| echo " Adapter saved to: Base/out/sft/rag_mcp_lora/final/" |
| echo " Full run folder : Base/out/sft/rag_mcp_lora/" |
| echo " To upload it to Hugging Face, run:" |
| echo " python3 upload_lora_to_hf.py --repo-id ASTERIZER/LUNA-100M --folder Base/out/sft/rag_mcp_lora --path-in-repo rag_mcp_lora" |
| if [ "${UPLOAD_TO_HF:-0}" = "1" ]; then |
| echo " UPLOAD_TO_HF=1 detected. Uploading adapter to Hugging Face..." |
| if [ -f "upload_lora_to_hf.py" ]; then |
| python3 upload_lora_to_hf.py \ |
| --repo-id ASTERIZER/LUNA-100M \ |
| --folder Base/out/sft/rag_mcp_lora \ |
| --path-in-repo rag_mcp_lora |
| else |
| python3 -c " |
| import os |
| from pathlib import Path |
| from huggingface_hub import HfApi |
| |
| folder = Path('Base/out/sft/rag_mcp_lora') |
| required = [folder / 'final' / 'adapter_model.pt', folder / 'final' / 'adapter_bundle.pt'] |
| missing = [str(path) for path in required if not path.exists()] |
| if missing: |
| raise FileNotFoundError('Missing expected adapter files: ' + ', '.join(missing)) |
| |
| api = HfApi(token=os.environ['HF_TOKEN']) |
| api.create_repo(repo_id='ASTERIZER/LUNA-100M', repo_type='model', exist_ok=True) |
| api.upload_folder( |
| repo_id='ASTERIZER/LUNA-100M', |
| repo_type='model', |
| folder_path=str(folder), |
| path_in_repo='rag_mcp_lora', |
| ) |
| print('uploaded_lora url=https://huggingface.co/ASTERIZER/LUNA-100M/tree/main/rag_mcp_lora') |
| " |
| fi |
| fi |
| echo "============================================================" |
|
|