tuandunghcmut's picture
Add files using upload-large-folder tool
b02e3a6 verified
MODEL_NAME=$1
DATA_NAME=$2
NUM_PROC=$(( ${3:4} ))
PORT=$(( ${4:-9898} ))
# get current date and time: contain year, month, day, hour, minute, second
DATETIME=$(date +'%Y-%m-%d_%H-%M-%S')
WORK_DIR=outputs/$MODEL_NAME/$DATA_NAME/$DATETIME
echo $WORK_DIR
eval "$(conda shell.bash hook)"
conda activate vlmeval
if [ "$NUM_PROC" -eq 1 ]; then
torchrun \
--nproc_per_node=$NUM_PROC \
--rdzv_endpoint=localhost:$PORT \
--rdzv_id=4 --rdzv_backend=c10d --nnodes=1 \
run.py --verbose \
--data $DATA_NAME \
--model $MODEL_NAME \
--work-dir $WORK_DIR
else
python run.py --verbose \
--data $DATA_NAME \
--model $MODEL_NAME \
--work-dir $WORK_DIR
fi
# --reuse
# cp ./cuda-11.8/bin ./cuda/bin