| # ================================ | |
| # Choose the case to run. | |
| # ================================ | |
| # Pipeline-parallel size options = [2, 4, 8, 16, 32]. | |
| PP=2 | |
| # Batch size (global batch size) options = [32, 512]. | |
| GBS=32 | |
| # Set pipeline-parallel and data-parallel size options. | |
| DP=$((64/PP)) | |
| # Other params. | |
| TP=1 | |
| MBS=1 | |
| NLS=32 | |
| HS=3840 | |
| NAH=32 | |
| DDP=local | |
| MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform " | |
| NNODES=8 | |
| # Name of the job. | |
| export JOB_NAME=results_figure_14_pipeline_parallel_size_${PP}_data_parallel_size_${DP}_batch_size_${GBS} | |
| # Import the configs. | |
| . `pwd`/CONFIG.sh | |
| # Submit the job. | |
| . `pwd`/SBATCH.sh | |
| exit 0 | |