File size: 653 Bytes
2af0e94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash
# Usage: ./qsub_train.sh [NUM_XPUS]
# Example: ./qsub_train.sh 2

NUM_XPUS=${1:-1}

sbatch <<EOF

#!/bin/bash -l

#SBATCH --job-name=om-train

#SBATCH --account=AIRR-P51-DAWN-GPU

#SBATCH --partition=pvc9

#SBATCH --nodes=1

#SBATCH --gres=gpu:${NUM_XPUS}

#SBATCH -n ${NUM_XPUS}

#SBATCH --time=24:00:00

#SBATCH --output=Logs/train_%j.out

#SBATCH --error=Logs/train_%j.err



. /etc/profile.d/modules.sh

module purge

module load rhel9/default-dawn



source ~/miniconda3/etc/profile.d/conda.sh

conda activate ~/rds/rds-airr-p51-TWhPgQVLKbA/Env/pub_env/pytorch-xpu



echo "Allocated ${NUM_XPUS} XPU(s)"

bash bash_train.sh

EOF