|
|
#!/usr/bin/env bash |
|
|
|
|
|
T=`date +%m%d%H%M` |
|
|
|
|
|
|
|
|
|
|
|
CFG=$1 |
|
|
GPUS=$2 |
|
|
|
|
|
|
|
|
GPUS_PER_NODE=$(($GPUS<8?$GPUS:8)) |
|
|
NNODES=`expr $GPUS / $GPUS_PER_NODE` |
|
|
|
|
|
MASTER_PORT=${MASTER_PORT:-28567} |
|
|
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} |
|
|
RANK=${RANK:-0} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/ |
|
|
|
|
|
|
|
|
if [ ! -d ${WORK_DIR}logs ]; then |
|
|
mkdir -p ${WORK_DIR}logs |
|
|
fi |
|
|
PYTHONPATH="/cpfs04/user/litianyu/projects/paradrive/external":$PYTHONPATH |
|
|
PYTHONPATH=$PYTHONPATH:"/cpfs04/user/litianyu/projects/paradrive/external/toolbox" |
|
|
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH |
|
|
PYTHONPATH=$PYTHONPATH:"/cpfs01/shared/opendrivelab/sii/wangcaojun/repo-wcj/AlgEngine/navsim" |
|
|
|
|
|
|
|
|
|
|
|
echo 'WORK_DIR: ' ${WORK_DIR} |
|
|
echo 'GPUS_PER_NODE: ' ${GPUS_PER_NODE} |
|
|
echo 'NNODES: ' ${NNODES} |
|
|
echo 'RANK: ' ${RANK} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
python -m torch.distributed.launch \ |
|
|
--nproc_per_node=${GPUS_PER_NODE} \ |
|
|
--master_addr=${MASTER_ADDR} \ |
|
|
--master_port=${MASTER_PORT} \ |
|
|
--nnodes=${NNODES} \ |
|
|
--node_rank=${RANK} \ |
|
|
$(dirname "$0")/train.py \ |
|
|
$CFG \ |
|
|
--launcher pytorch \ |
|
|
--deterministic \ |
|
|
--work-dir ${WORK_DIR} \ |
|
|
--cfg-options ${@:3} \ |
|
|
2>&1 | tee ${WORK_DIR}logs/train.$T |
|
|
|