YYYYYYUUU's picture
Backup FULL local core code incl. libs/ CUDA ext + all configs
3499c27 verified
Raw
History Blame Contribute Delete
1.94 kB
#!/bin/sh
cd $(dirname $(dirname "$0")) || exit
PYTHON=python
TEST_CODE=test.py
DATASET=scannet
CONFIG="None"
EXP_NAME=debug
WEIGHT=model_best
NUM_GPU=None
NUM_MACHINE=1
DIST_URL="auto"
while getopts "p:d:c:n:w:g:m:" opt; do
case $opt in
p)
PYTHON=$OPTARG
;;
d)
DATASET=$OPTARG
;;
c)
CONFIG=$OPTARG
;;
n)
EXP_NAME=$OPTARG
;;
w)
WEIGHT=$OPTARG
;;
g)
NUM_GPU=$OPTARG
;;
m)
NUM_MACHINE=$OPTARG
;;
\?)
echo "Invalid option: -$OPTARG"
;;
esac
done
if [ "${NUM_GPU}" = 'None' ]
then
NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'`
fi
echo "Experiment name: $EXP_NAME"
echo "Python interpreter dir: $PYTHON"
echo "Dataset: $DATASET"
echo "GPU Num: $NUM_GPU"
echo "Machine Num: $NUM_MACHINE"
if [ -n "$SLURM_NODELIST" ]; then
MASTER_HOSTNAME=$(scontrol show hostname "$SLURM_NODELIST" | head -n 1)
MASTER_ADDR=$(getent hosts "$MASTER_HOSTNAME" | awk '{ print $1 }')
MASTER_PORT=$((10000 + 0x$(echo -n "${DATASET}/${EXP_NAME}" | md5sum | cut -c 1-4 | awk '{print $1}') % 20000))
DIST_URL=tcp://$MASTER_ADDR:$MASTER_PORT
fi
echo "Dist URL: $DIST_URL"
EXP_DIR=exp/${DATASET}/${EXP_NAME}
MODEL_DIR=${EXP_DIR}/model
CODE_DIR=${EXP_DIR}/code
CONFIG_DIR=${EXP_DIR}/config.py
if [ "${CONFIG}" = "None" ]
then
CONFIG_DIR=${EXP_DIR}/config.py
else
CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
fi
echo "Loading config in:" $CONFIG_DIR
#export PYTHONPATH=./$CODE_DIR
export PYTHONPATH=./
echo "Running code in: $CODE_DIR"
echo " =========> RUN TASK <========="
ulimit -n 65536
#$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \
$PYTHON -u tools/$TEST_CODE \
--config-file "$CONFIG_DIR" \
--num-gpus "$NUM_GPU" \
--num-machines "$NUM_MACHINE" \
--machine-rank ${SLURM_NODEID:-0} \
--dist-url ${DIST_URL} \
--options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth