first_run / first_run_output /manual_launch.sh
vuiseng9's picture
WIP
a8b07e8
set -x
export LOGDIR=/mlperf_logdir # set the place where the output logs will be saved
export SLURM_JOBID=$(date +"%y-%m-%d__%H-%M-%S") # unique local ID for each run
export LOCAL_WORLD_SIZE=0 # non-slurm mode, set to 0 to use torchrun
# DO NOT SET DATA_ROOT & CKPT_ROOT
source config_DGXB200_1x8x1xtp1pp1cp1.sh
source run_and_time.sh
set +x