org_gdn_1B / finetune.sh
msj19's picture
Add files using upload-large-folder tool
5822512 verified
# export PDSH_RCMD_TYPE=ssh
# export NCCL_SOCKET_IFNAME=bond1
# export TRITON_CACHE_DIR=/mnt/jfzn/msj/triton
bash train.sh \
type=mask_gdn \
lr=3e-5 \
scheduler=cosine_with_min_lr \
batch=8 \
update=4 \
warmup=1024 \
steps=20480 \
context=2048 \
gpus=8 \
nodes=1 \
path=/mnt/jfzn/msj/train_exp/mask-gdn-test222 \
project=fla \
model=configs/mask_gdn_1B.json \
data=cerebras/SlimPajama-627B \
name=SlimPajama \
cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk3/slimp/train \
# config=/mnt/jfzn/msj/flash-linear-attention/legacy/training/configs/deepspeed_2node.yaml \
# bash train.sh \
# type=mask_gdn \
# lr=3e-5 \
# scheduler=cosine_with_min_lr \
# batch=16 \
# update=2 \
# warmup=512 \
# steps=30720 \
# context=2048 \
# gpus=8 \
# nodes=1 \
# path=/mnt/jfzn/msj/train_exp/mask_gdn_340M-rank4-method1_byt \
# project=fla \
# model=configs/mask_gdn_340M.json \
# data=cerebras/SlimPajama-627B \
# name=SlimPajama \
# cache=/mnt/jfzn/msj/slim_chunk1 \
# checkpoint=/mnt/jfzn/msj/train_exp/mask_gdn_340M-rank4-method1_byt/checkpoint-14336 \
# # cache=/mnt/jfzn/data/SlimPajama-627B/pre_slimp_chunk1/slimp/train \