#!/usr/bin/env bash set -euo pipefail GPUS="0" BASE_MODEL="checkpoints/llava-v1.6-vicuna-7b" DATA="data/DRSeg" CLIP="checkpoints/clip-vit-large-patch14" EXP="pixdlm_train" PORT="${PORT:-29511}" EPOCHS="${EPOCHS:-10}" STEPS_PER_EPOCH="${STEPS_PER_EPOCH:-200}" PRECISION="${PRECISION:-bf16}" while [[ $# -gt 0 ]]; do case "$1" in --gpus) GPUS="$2"; shift 2 ;; --base-model) BASE_MODEL="$2"; shift 2 ;; --data) DATA="$2"; shift 2 ;; --clip) CLIP="$2"; shift 2 ;; --exp) EXP="$2"; shift 2 ;; --epochs) EPOCHS="$2"; shift 2 ;; --steps-per-epoch) STEPS_PER_EPOCH="$2"; shift 2 ;; --port) PORT="$2"; shift 2 ;; --precision) PRECISION="$2"; shift 2 ;; *) echo "Unknown argument: $1" >&2; exit 2 ;; esac done ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT" export PYTHONPATH="$ROOT:${PYTHONPATH:-}" export TOKENIZERS_PARALLELISM=false export TRANSFORMERS_VERBOSITY=error mkdir -p "logs/$EXP" deepspeed --master_port="$PORT" --include="localhost:$GPUS" train_ds.py \ --epochs="$EPOCHS" \ --steps_per_epoch="$STEPS_PER_EPOCH" \ --version="$BASE_MODEL" \ --dataset_dir="$DATA" \ --dataset="custom_seg" \ --sample_rates="1" \ --exp_name="$EXP" \ --log_base_dir="$ROOT/logs" \ --val_dataset="custom_seg|val" \ --train_mask_decoder \ --Three_Level_Multi_Scale_Decoder \ --vision-tower="$CLIP" \ --seg_token_num=3 \ --num_classes_per_question=3 \ --batch_size=1 \ --grad_accumulation_steps=1 \ --val_batch_size=1 \ --preprocessor_config="$ROOT/configs/preprocessor_448.json" \ --resize_vision_tower \ --resize_vision_tower_size=448 \ --vision_tower_for_mask \ --use_expand_question_list \ --image_feature_scale_num=3 \ --conv_type="llava_v1" \ --is_multipath_encoder \ --precision="$PRECISION" 2>&1 | tee "logs/$EXP/train.log"