ddy0126
/

Minigpt4

Model card Files Files and versions

Minigpt4 / training_scripts /stage_3.sh

ddy0126's picture

Upload folder using huggingface_hub

2bcab80 verified 4 days ago

history blame contribute delete

723 Bytes

	#!/bin/bash
	#SBATCH --partition=batch
	#SBATCH --job-name=test
	#SBATCH --output=test.out
	#SBATCH --error=test.err
	#SBATCH --time=23:00:00
	#SBATCH --mem=110G
	#SBATCH --gres=gpu:a100:4
	#SBATCH --cpus-per-task=16
	## run the application:
	job_name="test" # Name of the experiment
	cfg_path="train_configs/224_v2_llama2_video_stage_3.yaml" # path to the config file
	number_of_gpus=1 # number of gpus
	# cd ../../

	read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range
	while :
	do
	PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`"
	ss -lpn \| grep -q ":$PORT " \|\| break
	done
	echo "Port is $PORT"
	torchrun --master-port ${PORT} --nproc-per-node $number_of_gpus train.py --job_name ${job_name} --cfg-path ${cfg_path}