| MODEL_NAME=$1 | |
| DATA_NAME=$2 | |
| NUM_PROC=$(( ${3:4} )) | |
| PORT=$(( ${4:-9898} )) | |
| # get current date and time: contain year, month, day, hour, minute, second | |
| DATETIME=$(date +'%Y-%m-%d_%H-%M-%S') | |
| WORK_DIR=outputs/$MODEL_NAME/$DATA_NAME/$DATETIME | |
| echo $WORK_DIR | |
| eval "$(conda shell.bash hook)" | |
| conda activate vlmeval | |
| if [ "$NUM_PROC" -eq 1 ]; then | |
| torchrun \ | |
| --nproc_per_node=$NUM_PROC \ | |
| --rdzv_endpoint=localhost:$PORT \ | |
| --rdzv_id=4 --rdzv_backend=c10d --nnodes=1 \ | |
| run.py --verbose \ | |
| --data $DATA_NAME \ | |
| --model $MODEL_NAME \ | |
| --work-dir $WORK_DIR | |
| else | |
| python run.py --verbose \ | |
| --data $DATA_NAME \ | |
| --model $MODEL_NAME \ | |
| --work-dir $WORK_DIR | |
| fi | |
| # --reuse | |
| # cp ./cuda-11.8/bin ./cuda/bin | |