|
|
set -eux |
|
|
NOW=`date +%Y-%m-%d-%H:%M:%S` |
|
|
LLM_RECIPES_DIR=/project |
|
|
source $LLM_RECIPES_DIR/scripts/wmt2024/tokens.sh |
|
|
rm -f /tmp/hffs-* |
|
|
|
|
|
export WANDB_NOTES="Train sample" |
|
|
wandb login |
|
|
NUM_GPU_PER_NODE=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) |
|
|
NUM_NODES=1 |
|
|
NUM_GPUS=$((${NUM_NODES} * ${NUM_GPU_PER_NODE})) |
|
|
|
|
|
|
|
|
SEQ_LENGTH=4096 |
|
|
SLIDING_WINDOW_SIZE=131072 |
|
|
DATA_PARALLEL_SIZE=$NUM_GPUS |
|
|
|
|
|
MICRO_BATCH_SIZE=1 |
|
|
GLOBAL_BATCH_SIZE=320 |
|
|
TRAIN_STEPS=20000 |
|
|
VALID_MICRO_BATCH_SIZE=1 |
|
|
|
|
|
|
|
|
LR=2e-5 |
|
|
MIN_LR=1e-6 |
|
|
LR_WARMUP_STEPS=500 |
|
|
LR_DECAY_STEPS=$TRAIN_STEPS |
|
|
WEIGHT_DECAY=0.1 |
|
|
GRAD_CLIP=1.0 |
|
|
|
|
|
|
|
|
TOKENIZER_MODEL=/share/pretrained_lm/Phi/Phi-2 |
|
|
BASE_MODEL=$TOKENIZER_MODEL |
|
|
|
|
|
LOAD_DIR=$BASE_MODEL |
|
|
SAVE_DIR=/work/llm_recipes/models/yans-baseline-Phi-2 |
|
|
mkdir -p $(dirname $SAVE_DIR) |
|
|
SAVE_BASE_NAME=$(basename $SAVE_DIR) |
|
|
LOG_FILE_PATH=$SAVE_DIR/train_${NOW}.log |
|
|
|
|
|
mkdir -p ${SAVE_DIR} |
|
|
|
|
|
|
|
|
TRAIN_DATA_PATH="519177757 /work/llm_recipes/datasets/bin/baseline_phi2/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document" |
|
|
TRAIN_DATA_PATH="${TRAIN_DATA_PATH} 519177757 /work/llm_recipes/datasets/bin/baseline_phi2/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document" |
|
|
|
|
|
VALID_DATA_PATH="519177757 /work/llm_recipes/datasets/bin/baseline_phi2/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document" |
|
|
TEST_DATA_PATH=${VALID_DATA_PATH} |
|
|
|
|
|
|
|
|
set +e |
|
|
cd $LLM_RECIPES_DIR |
|
|
|
|
|
|
|
|
DISTRIBUTED_ARGS="--nproc_per_node $NUM_GPU_PER_NODE --nnodes 1 --node_rank 0 --master_addr localhost --master_port 8000" |
|
|
torchrun $DISTRIBUTED_ARGS examples/finetuning.py \ |
|
|
--seq-length ${SEQ_LENGTH} \ |
|
|
--sliding-window-size ${SLIDING_WINDOW_SIZE} \ |
|
|
--micro-batch-size ${MICRO_BATCH_SIZE} \ |
|
|
--valid_micro_batch_size ${VALID_MICRO_BATCH_SIZE} \ |
|
|
--global-batch-size ${GLOBAL_BATCH_SIZE} \ |
|
|
--train-iters ${TRAIN_STEPS} \ |
|
|
--tokenizer-type HFPreTrainedTokenizer \ |
|
|
--tokenizer-model ${TOKENIZER_MODEL} \ |
|
|
--train-data-path ${TRAIN_DATA_PATH} \ |
|
|
--valid-data-path ${VALID_DATA_PATH} \ |
|
|
--test-data-path ${TEST_DATA_PATH} \ |
|
|
--lr ${LR} \ |
|
|
--min-lr ${MIN_LR} \ |
|
|
--lr-decay-style cosine \ |
|
|
--lr-warmup-iters ${LR_WARMUP_STEPS} \ |
|
|
--lr-decay-iters ${LR_DECAY_STEPS} \ |
|
|
--weight-decay ${WEIGHT_DECAY} \ |
|
|
--grad-clip-norm ${GRAD_CLIP} \ |
|
|
--optimizer anyprecision \ |
|
|
--adam-beta1 0.9 \ |
|
|
--adam-beta2 0.95 \ |
|
|
--adam-eps 1e-6 \ |
|
|
--save-interval 500 \ |
|
|
--eval-interval 500 \ |
|
|
--eval-iters 10 \ |
|
|
--bf16 \ |
|
|
--mixed-precision \ |
|
|
--base-model ${BASE_MODEL} \ |
|
|
--save ${SAVE_DIR} \ |
|
|
--load ${SAVE_DIR} \ |
|
|
--fsdp-activation-checkpointing \ |
|
|
--sharding-strategy FULL_SHARD \ |
|
|
--checkpoint-type LOCAL_STATE_DICT \ |
|
|
--save-n-checkpoints 10 \ |
|
|
--upload-all-checkpoints-to-hf \ |
|
|
--hf-upload-retry-limit 2 \ |
|
|
--hf-repo-id shirayukikun/$SAVE_BASE_NAME \ |
|
|
--wandb-entity "keitokudo" \ |
|
|
--wandb-project "llm_tutorial" \ |
|
|
--wandb-name ${SAVE_BASE_NAME}_train_${NOW} 2>&1 | tee $LOG_FILE_PATH |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rm -f /tmp/hffs-* |
|
|
|