Adaptive-Block-Forcing / FlexMDM /scripts /wikitext2_any_order.sbatch
Bailan-Alex's picture
Upload folder using huggingface_hub
4f2b2f4 verified
#!/bin/bash
#SBATCH --job-name=wikitext2_any_order
#SBATCH --partition=kempner_h100
#SBATCH --account=kempner_albergo_lab
#SBATCH --partition=kempner_h100
#SBATCH --nodes=2
#SBATCH --gpus-per-node=4
#SBATCH --ntasks-per-node=4
#SBATCH --mem=100GB
#SBATCH --time=1-00:00:00
#SBATCH --output=slurm_logs/wikitext2/job-%j.out
export NCCL_SOCKET_FAMILY=AF_INET
export MASTER_ADDR=$(scontrol show hostnames $SLURM_NODELIST | head -n 1)
export MASTER_PORT=$(shuf -i 15000-59999 -n 1)
export NODE_RANK=$SLURM_NODEID
export NCCL_DEBUG=INFO
export NCCL_DEBUG_SUBSYS=ALL
export TORCH_DISTRIBUTED_DEBUG=DETAIL
srun python train.py --config-path config/wikitext2 --config-name any_order