| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| |
| |
|
|
| |
| |
| |
| : "${MANTHAN_MODEL:=zyxcisss/Manthan-T1}" |
| : "${TEXT_MODEL:=Qwen/Qwen3-0.6B-Base}" |
| : "${STAGE1_DS:=liuhaotian/LLaVA-CC3M-Pretrain-595K}" |
| : "${STAGE2_DS:=liuhaotian/LLaVA-Instruct-150K}" |
|
|
| : "${OUT_BASE:=/kaggle/working/manthan_runs}" |
| : "${STAGE1_OUT:=${OUT_BASE}/stage1}" |
| : "${STAGE2_OUT:=${OUT_BASE}/stage2}" |
|
|
| |
| : "${MAX_LENGTH:=2048}" |
| : "${IMAGE_SIZE:=384}" |
| : "${BATCH_SIZE:=1}" |
| : "${GRAD_ACCUM:=32}" |
| : "${LR:=1e-4}" |
| : "${EPOCHS_STAGE1:=1}" |
| : "${EPOCHS_STAGE2:=1}" |
|
|
| |
| : "${LIMIT_STAGE1:=20000}" |
| : "${LIMIT_STAGE2:=150000}" |
|
|
| |
| : "${USE_LORA:=1}" |
|
|
| |
| : "${UPLOAD:=0}" |
|
|
| |
| |
| |
| if command -v nvidia-smi >/dev/null 2>&1; then |
| echo "GPU found:"; nvidia-smi || true |
| else |
| echo "WARNING: nvidia-smi not found. This script expects a CUDA runtime (Kaggle)." |
| fi |
|
|
| |
| export HF_HOME="${HF_HOME:-/kaggle/working/hf}" |
| export TRANSFORMERS_CACHE="${TRANSFORMERS_CACHE:-/kaggle/working/hf/transformers}" |
| export HF_DATASETS_CACHE="${HF_DATASETS_CACHE:-/kaggle/working/hf/datasets}" |
|
|
| mkdir -p "${HF_HOME}" "${TRANSFORMERS_CACHE}" "${HF_DATASETS_CACHE}" "${OUT_BASE}" |
|
|
| |
| |
| |
| python - <<'PY' |
| import sys |
| print("python:", sys.version) |
| PY |
|
|
| |
| python -m pip install -U pip |
| python -m pip install -U "transformers>=4.45" accelerate datasets peft |
|
|
| |
| python -m pip install -U unsloth || true |
|
|
| |
| |
| |
| maybe_limit_args() { |
| local limit_val="$1" |
| if [[ -n "${limit_val}" ]]; then |
| echo "--limit" "${limit_val}" |
| fi |
| } |
|
|
| maybe_lora_args() { |
| if [[ "${USE_LORA}" == "1" ]]; then |
| echo "--use_lora" |
| else |
| echo "" |
| fi |
| } |
|
|
| |
| |
| |
| echo "==== Stage 1: projector alignment/pretrain ====" |
| python scripts/train_unsloth_kaggle.py \ |
| --stage stage1 \ |
| --manthan_model "${MANTHAN_MODEL}" \ |
| --text_model "${TEXT_MODEL}" \ |
| --dataset "${STAGE1_DS}" \ |
| --output_dir "${STAGE1_OUT}" \ |
| $(maybe_lora_args) \ |
| --max_length "${MAX_LENGTH}" \ |
| --image_size "${IMAGE_SIZE}" \ |
| --batch_size "${BATCH_SIZE}" \ |
| --grad_accum "${GRAD_ACCUM}" \ |
| --lr "${LR}" \ |
| --epochs "${EPOCHS_STAGE1}" \ |
| $(maybe_limit_args "${LIMIT_STAGE1}") |
|
|
| |
| |
| |
| echo "==== Stage 2: instruction finetune ====" |
| python scripts/train_unsloth_kaggle.py \ |
| --stage stage2 \ |
| --manthan_model "${MANTHAN_MODEL}" \ |
| --text_model "${TEXT_MODEL}" \ |
| --dataset "${STAGE2_DS}" \ |
| --output_dir "${STAGE2_OUT}" \ |
| $(maybe_lora_args) \ |
| --max_length "${MAX_LENGTH}" \ |
| --image_size "${IMAGE_SIZE}" \ |
| --batch_size "${BATCH_SIZE}" \ |
| --grad_accum "${GRAD_ACCUM}" \ |
| --lr "${LR}" \ |
| --epochs "${EPOCHS_STAGE2}" \ |
| $(maybe_limit_args "${LIMIT_STAGE2}") |
|
|
| echo "==== Done ====" |
| echo "Stage1 outputs: ${STAGE1_OUT}" |
| echo "Stage2 outputs: ${STAGE2_OUT}" |
|
|
| |
| |
| |
| if [[ "${UPLOAD}" == "1" ]]; then |
| echo "UPLOAD=1: attempting to upload artifacts (requires HF auth)." |
| python -m pip install -U huggingface_hub |
| echo "You can now upload ${OUT_BASE} with your preferred workflow." |
| fi |
|
|