msj19's picture
Add files using upload-large-folder tool
c39435c verified
export PDSH_RCMD_TYPE=ssh
export NCCL_SOCKET_IFNAME=bond1
export NCCL_IB_DISABLE=0 # 明确启用IB
bash train_node.sh \
type=gated_deltanet \
lr=3e-4 \
scheduler=cosine_with_min_lr \
batch=8 \
update=4 \
warmup=512 \
steps=50016 \
context=2048 \
gpus=8 \
nodes=4 \
path=/mnt/jfzn/msj/train_exp/gdn_1B_a800 \
project=fla \
model=configs/gdn_1B.json \
data=cerebras/SlimPajama-627B \
name=SlimPajama \
cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk1/slimp/train,/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk2/slimp/train \
checkpoint=/mnt/jfzn/msj/train_exp/gdn_1B_a800/checkpoint-2048 \
# bash train_node.sh \
# type=gdn \
# lr=3e-4 \
# scheduler=cosine_with_min_lr \
# batch=8 \
# update=4 \
# warmup=512 \
# steps=50016 \
# context=2048 \
# gpus=8 \
# nodes=2 \
# path=/mnt/jfzn/msj/train_exp/gdn_1B_hrr4_head10——test \
# project=fla \
# model=configs/gdn_1B.json \
# data=cerebras/SlimPajama-627B \
# name=SlimPajama \
# cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk1/slimp/train,/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk2/slimp/train \