Spaces:

Lingteng
/

LHMPP

Running on Zero

Lingteng Qiu (邱陵腾）

rm assets & wheels

434b0b0 3 days ago

1.57 kB

	#!/bin/bash
	# Run script
	# Settings of training & test for different tasks.
	method="$1"
	task=$(python3 config.py --print_task)
	case "${task}" in
	'DIS5K') epochs=200 && val_last=50 && step=5 ;;
	'COD') epochs=150 && val_last=50 && step=5 ;;
	'HRSOD') epochs=150 && val_last=50 && step=5 ;;
	'General') epochs=250 && val_last=50 && step=2 ;;
	'General-2K') epochs=250 && val_last=30 && step=2 ;;
	'Matting') epochs=100 && val_last=30 && step=2 ;;
	esac
	testsets=NO # Non-existing folder to skip.
	# testsets=TE-COD10K # for COD

	# Train
	devices=$2
	nproc_per_node=$(echo ${devices%%,} \| grep -o "," \| wc -l)

	to_be_distributed=`echo ${nproc_per_node} \| awk '{if($e > 0) print "True"; else print "False";}'`

	echo Training started at $(date)
	if [ ${to_be_distributed} == "True" ]
	then
	# Adapt the nproc_per_node by the number of GPUs. Give 8989 as the default value of master_port.
	echo "Multi-GPU mode received..."
	CUDA_VISIBLE_DEVICES=${devices} \
	torchrun --standalone --nproc_per_node $((nproc_per_node+1)) \
	train.py --ckpt_dir ckpt/${method} --epochs ${epochs} \
	--testsets ${testsets} \
	--dist ${to_be_distributed} \
	--resume xx/xx-epoch_244.pth \
	# --use_accelerate
	else
	echo "Single-GPU mode received..."
	CUDA_VISIBLE_DEVICES=${devices} \
	python train.py --ckpt_dir ckpt/${method} --epochs ${epochs} \
	--testsets ${testsets} \
	--dist ${to_be_distributed} \
	--resume xx/xx-epoch_244.pth
	# --use_accelerate
	fi

	echo Training finished at $(date)