Spaces:
No application file
No application file
| #!/bin/bash | |
| #SBATCH --job-name=wikitext2_any_order | |
| #SBATCH --partition=kempner_h100 | |
| #SBATCH --account=kempner_albergo_lab | |
| #SBATCH --partition=kempner_h100 | |
| #SBATCH --nodes=2 | |
| #SBATCH --gpus-per-node=4 | |
| #SBATCH --ntasks-per-node=4 | |
| #SBATCH --mem=100GB | |
| #SBATCH --time=1-00:00:00 | |
| #SBATCH --output=slurm_logs/wikitext2/job-%j.out | |
| export NCCL_SOCKET_FAMILY=AF_INET | |
| export MASTER_ADDR=$(scontrol show hostnames $SLURM_NODELIST | head -n 1) | |
| export MASTER_PORT=$(shuf -i 15000-59999 -n 1) | |
| export NODE_RANK=$SLURM_NODEID | |
| export NCCL_DEBUG=INFO | |
| export NCCL_DEBUG_SUBSYS=ALL | |
| export TORCH_DISTRIBUTED_DEBUG=DETAIL | |
| srun python train.py --config-path config/wikitext2 --config-name any_order |