| #!/bin/bash |
| set -e |
|
|
| PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| cd "$PROJECT_ROOT" |
| ENV_ROOT="${PROJECT_ROOT}/envs/streampetr" |
| DEEPSPEED_BIN="${ENV_ROOT}/bin/deepspeed" |
|
|
| if [ ! -x "$DEEPSPEED_BIN" ]; then |
| echo "ERROR: deepspeed not found at ${DEEPSPEED_BIN}" >&2 |
| exit 1 |
| fi |
|
|
| export PATH="${ENV_ROOT}/bin:${PATH}" |
| export LD_LIBRARY_PATH="${ENV_ROOT}/lib:${LD_LIBRARY_PATH}" |
|
|
| OUTPUT_DIR="work_dirs/atlas_4task_coordfix" |
| mkdir -p "$OUTPUT_DIR" |
|
|
| EXTRA_ARGS="" |
| if [ -n "$RESUME_CKPT" ]; then |
| EXTRA_ARGS="--resume $RESUME_CKPT" |
| fi |
|
|
| setsid nohup "$DEEPSPEED_BIN" --num_gpus 4 train_atlas.py \ |
| --llm_model pretrained/vicuna-7b-v1.5 \ |
| --data_json data/atlas_nuscenes_train.json,data/openlane_subsetB_lane_train_4pt_aligned.json,data/atlas_planning_train_uniad_command.json,data/atlas_caption_train_canonical.json \ |
| --data_root /home/guoyuanbo/autodl-tmp/data/nuscenes \ |
| --visual_token_mode online \ |
| --task_balance_mode none \ |
| --planning_table3_mode atlas_high_level \ |
| --streampetr_config configs/streampetr_atlas_aligned.py \ |
| --streampetr_ckpt pretrained/streampetr/streampetr_eva02_ep24.pth \ |
| --topomlp_config configs/topomlp_atlas_aligned.py \ |
| --topomlp_ckpt work_dirs/topomlp_atlas_aligned/epoch_24.pth \ |
| --deepspeed configs/ds_zero2.json \ |
| --output_dir "$OUTPUT_DIR" \ |
| --epochs 10 \ |
| --lr 2e-5 \ |
| --weight_decay 1e-4 \ |
| --batch_size 1 \ |
| --gradient_accumulation_steps 2 \ |
| --warmup_ratio 0.03 \ |
| --max_grad_norm 1.0 \ |
| --log_steps 100 \ |
| --save_epochs 1 \ |
| --keep_last_n_ckpts 0 \ |
| --seed 42 \ |
| --num_workers 4 \ |
| $EXTRA_ARGS > "$OUTPUT_DIR/nohup.out" 2>&1 & |
|
|
| echo "Training launched in background (setsid + nohup)." |
| echo "PID: $!" |
| echo "Log: $OUTPUT_DIR/nohup.out" |
| echo "" |
| echo "Monitor: tail -f $OUTPUT_DIR/nohup.out" |
| echo "Check GPU: nvidia-smi" |
|
|