File size: 1,847 Bytes
f693366 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | #!/bin/bash
set -e
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$PROJECT_ROOT"
ENV_ROOT="${PROJECT_ROOT}/envs/streampetr"
DEEPSPEED_BIN="${ENV_ROOT}/bin/deepspeed"
if [ ! -x "$DEEPSPEED_BIN" ]; then
echo "ERROR: deepspeed not found at ${DEEPSPEED_BIN}" >&2
exit 1
fi
export PATH="${ENV_ROOT}/bin:${PATH}"
export LD_LIBRARY_PATH="${ENV_ROOT}/lib:${LD_LIBRARY_PATH}"
OUTPUT_DIR="work_dirs/atlas_4task_coordfix"
mkdir -p "$OUTPUT_DIR"
EXTRA_ARGS=""
if [ -n "$RESUME_CKPT" ]; then
EXTRA_ARGS="--resume $RESUME_CKPT"
fi
setsid nohup "$DEEPSPEED_BIN" --num_gpus 4 train_atlas.py \
--llm_model pretrained/vicuna-7b-v1.5 \
--data_json data/atlas_nuscenes_train.json,data/openlane_subsetB_lane_train_4pt_aligned.json,data/atlas_planning_train_uniad_command.json,data/atlas_caption_train_canonical.json \
--data_root /home/guoyuanbo/autodl-tmp/data/nuscenes \
--visual_token_mode online \
--task_balance_mode none \
--planning_table3_mode atlas_high_level \
--streampetr_config configs/streampetr_atlas_aligned.py \
--streampetr_ckpt pretrained/streampetr/streampetr_eva02_ep24.pth \
--topomlp_config configs/topomlp_atlas_aligned.py \
--topomlp_ckpt work_dirs/topomlp_atlas_aligned/epoch_24.pth \
--deepspeed configs/ds_zero2.json \
--output_dir "$OUTPUT_DIR" \
--epochs 10 \
--lr 2e-5 \
--weight_decay 1e-4 \
--batch_size 1 \
--gradient_accumulation_steps 2 \
--warmup_ratio 0.03 \
--max_grad_norm 1.0 \
--log_steps 100 \
--save_epochs 1 \
--keep_last_n_ckpts 0 \
--seed 42 \
--num_workers 4 \
$EXTRA_ARGS > "$OUTPUT_DIR/nohup.out" 2>&1 &
echo "Training launched in background (setsid + nohup)."
echo "PID: $!"
echo "Log: $OUTPUT_DIR/nohup.out"
echo ""
echo "Monitor: tail -f $OUTPUT_DIR/nohup.out"
echo "Check GPU: nvidia-smi"
|