|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
export CUDA_VISIBLE_DEVICES=3 |
|
|
export OMP_NUM_THREADS=1 |
|
|
export MKL_NUM_THREADS=1 |
|
|
|
|
|
|
|
|
CONFIG_FILE="/vePFS-0x0d/home/yewh/Hiera_MAE/configs/finetune_config.yaml" |
|
|
NUM_GPUS=1 |
|
|
MASTER_PORT=29503 |
|
|
|
|
|
|
|
|
OUTPUT_DIR="/vePFS-0x0d/home/yewh/Hiera_MAE/output/downstream/nki/age-lp3" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Starting DDP fine-tuning with $NUM_GPUS GPUs..." |
|
|
echo "Config: $CONFIG_FILE" |
|
|
echo "Output directory: $OUTPUT_DIR" |
|
|
|
|
|
|
|
|
if [ -z "$RESUME_CHECKPOINT" ]; then |
|
|
|
|
|
torchrun \ |
|
|
--standalone \ |
|
|
--nnodes=1 \ |
|
|
--nproc_per_node=$NUM_GPUS \ |
|
|
--master_port=$MASTER_PORT \ |
|
|
/vePFS-0x0d/home/yewh/Hiera_MAE/finetune.py \ |
|
|
--config $CONFIG_FILE \ |
|
|
--output_dir $OUTPUT_DIR |
|
|
else |
|
|
|
|
|
torchrun \ |
|
|
--standalone \ |
|
|
--nnodes=1 \ |
|
|
--nproc_per_node=$NUM_GPUS \ |
|
|
--master_port=$MASTER_PORT \ |
|
|
/vePFS-0x0d/home/yewh/Hiera_MAE/finetune.py \ |
|
|
--config $CONFIG_FILE \ |
|
|
--output_dir $OUTPUT_DIR \ |
|
|
--resume $RESUME_CHECKPOINT |
|
|
fi |
|
|
|
|
|
echo "Fine-tuning completed!" |
|
|
|