# export PDSH_RCMD_TYPE=ssh # export NCCL_SOCKET_IFNAME=bond1 # export NCCL_IB_DISABLE=0 # 明确启用IB bash train.sh \ type=emgla \ lr=3e-4 \ scheduler=cosine_with_min_lr \ batch=16 \ update=2 \ warmup=512 \ steps=30720 \ context=2048 \ gpus=8 \ nodes=1 \ path=/mnt/jfzn/msj/train_exp/emgla_340M \ project=fla \ model=configs/emgla_340M.json \ data=cerebras/SlimPajama-627B \ name=SlimPajama \ cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk1/slimp/train \ tasks=run \