gated_deltaproduct_layer17 / run_one_node.sh
msj19's picture
Add files using upload-large-folder tool
c39435c verified
# export PDSH_RCMD_TYPE=ssh
# export NCCL_SOCKET_IFNAME=bond1
# export NCCL_IB_DISABLE=0 # 明确启用IB
bash train.sh \
type=emgla \
lr=3e-4 \
scheduler=cosine_with_min_lr \
batch=16 \
update=2 \
warmup=512 \
steps=30720 \
context=2048 \
gpus=8 \
nodes=1 \
path=/mnt/jfzn/msj/train_exp/emgla_340M \
project=fla \
model=configs/emgla_340M.json \
data=cerebras/SlimPajama-627B \
name=SlimPajama \
cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk1/slimp/train \
tasks=run \