AReUReDi / scripts /peptide_new_coupling.sh
Tong Chen
add files
d2693e0
raw
history blame contribute delete
967 Bytes
#!/bin/bash
#SBATCH --job-name=new_coupling # Name of the job
#SBATCH --output=outs/new_coupling.out # Stdout goes to logs/jobname_jobid.out
#SBATCH --error=outs/new_coupling.err # Stderr goes to logs/jobname_jobid.err
#SBATCH --partition=dgx-b200 # Queue to submit to
#SBATCH --ntasks=1 # Number of tasks (usually one per process)
#SBATCH --nodes=1
#SBATCH --gpus=1
#SBATCH --ntasks-per-node=8
#SBATCH --mem-per-gpu=128G
#SBATCH --cpus-per-gpu=8
export OMP_NUM_THREADS=64
# export NCCL_DEBUG=INFO
export NCCL_NVLS_ENABLE=1
export NCCL_IB_ADAPTIVE_ROUTING=1
export NCCL_IB_SL=1
export NCCL_IB_QPS_PER_CONNECTION=2
export NCCL_IB_SPLIT_DATA_ON_QPS=0
export NCCL_IB_HCA=mlx5_15,mlx5_10,mlx5_14,mlx5_13,mlx5_8,mlx5_7,mlx5_9,mlx5_4
export NCCL_SOCKET_IFNAME=bond0
export NCCL_ALGO=RING
export UCX_TLS=rc
python ./peptide/new_coupling.py \
--checkpoint ./peptide/ckpt/PepReDi_v2.pt \
--version 3 \
--gen_steps 16