| #!/bin/bash |
|
|
| WORLD_SIZE=8 |
|
|
| DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \ |
| --nnodes 1 \ |
| --node_rank 0 \ |
| --master_addr localhost \ |
| --master_port 6000" |
|
|
| TASK="IFLYTEK" |
| TRAIN_DATA="clue_data/iflytek/train.json" |
| VALID_DATA="clue_data/iflytek/dev.json" |
| TEST_DATA="clue_data/iflytek/test.json" |
| PRETRAINED_CHECKPOINT="./yuyan-10b" |
|
|
| VOCAB_FILE=bert-vocab.txt |
|
|
| for lr in 7e-6 1e-5 2e-5; do |
| for bs in 24 16 8; do |
| for ep in 2 3 5 7 15; do |
| ct=`date +"%m%d%H%M%S"` |
| OUTPUTS_PATH="outputs/${TASK}/yuyan_bs_${bs}_lr_${lr}_ep_${ep}_${ct}" |
| if [ ! -d ${OUTPUTS_PATH} ];then |
| mkdir -p ${OUTPUTS_PATH} |
| else |
| echo "dir exist, not mkdir" |
| fi |
| python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \ |
| --task $TASK \ |
| --seed 1242 \ |
| --pretrained-checkpoint $PRETRAINED_CHECKPOINT \ |
| --train-data $TRAIN_DATA \ |
| --valid-data $VALID_DATA \ |
| --test-data $TEST_DATA \ |
| --tokenizer-type BertWordPieceLowerCase \ |
| --vocab-file $VOCAB_FILE \ |
| --epochs $ep \ |
| --tensor-model-parallel-size 8 \ |
| --num-layers 48 \ |
| --hidden-size 4096 \ |
| --num-attention-heads 64 \ |
| --micro-batch-size $bs \ |
| --lr $lr \ |
| --lr-decay-style linear \ |
| --lr-warmup-fraction 0.1 \ |
| --seq-length 512 \ |
| --max-position-embeddings 512 \ |
| --log-interval 10 \ |
| --eval-interval 600 \ |
| --eval-iters 20 \ |
| --weight-decay 1.0e-1 \ |
| --res-path ${OUTPUTS_PATH} \ |
| --fp16 | tee ${OUTPUTS_PATH}/job.log |
| |
| |
|
|
| done |
| done |
| done |
|
|