| # export PDSH_RCMD_TYPE=ssh | |
| # export NCCL_SOCKET_IFNAME=bond1 | |
| # export TRITON_CACHE_DIR=/mnt/jfzn/msj/triton | |
| bash train.sh \ | |
| type=gated_deltanet \ | |
| lr=3e-5 \ | |
| scheduler=cosine_with_min_lr \ | |
| batch=8 \ | |
| update=4 \ | |
| warmup=1024 \ | |
| steps=20480 \ | |
| context=2048 \ | |
| gpus=8 \ | |
| nodes=1 \ | |
| path=/mnt/jfzn/msj/train_exp/gdn-finetune \ | |
| project=fla \ | |
| model=configs/gdn_1B.json \ | |
| data=cerebras/SlimPajama-627B \ | |
| name=SlimPajama \ | |
| cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk3/slimp/train \ | |