FuseAI
/

OpenChat-3.5-7B-Starling-v2.0

Text Generation

text-generation-inference

Model card Files Files and versions

AALF commited on Aug 15, 2024

Commit

8661356

·

verified ·

1 Parent(s): bc28f1b

Delete train_openchat_starling.sh

Files changed (1) hide show

train_openchat_starling.sh +0 -54

train_openchat_starling.sh DELETED Viewed

@@ -1,54 +0,0 @@
-#!/bin/bash
-PROJ_PATH=/GLOBALFS/sysu_xjquan_2/yangzy/fusechat
-BASE_MODEL_NAME=openchat_3.5
-TEACHER_MODEL_NAME=starling
-MODEL_PATH=/GLOBALFS/sysu_xjquan_2/yangzy/models/$BASE_MODEL_NAME
-MODEL_SAVE_PATH="${PROJ_PATH}/model_ckpt/${BASE_MODEL_NAME}_${TEACHER_MODEL_NAME}_ckpt1"
-mkdir ${MODEL_SAVE_PATH}
-dataset_dir="${PROJ_PATH}/representations/openchat_starling_internlm_representation_fnan_split0,${PROJ_PATH}/representations/openchat_starling_internlm_representation_fnan_split1,${PROJ_PATH}/representations/openchat_starling_internlm_representation_fnan_split2,${PROJ_PATH}/representations/openchat_starling_internlm_representation_fnan_split3"
-# OpenChat-3.5-7B <-> Starling-LM-7B-alpha
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-torchrun --nproc_per_node=8 --master_port=20001 ${PROJ_PATH}/train/train.py \
-  --model_name_or_path ${MODEL_PATH} \
-  --data_path ${dataset_dir} \
-  --bf16 True \
-  --output_dir ${MODEL_SAVE_PATH} \
-  --num_train_epochs 3 \
-  --per_device_train_batch_size 8 \
-  --per_device_eval_batch_size 8 \
-  --gradient_accumulation_steps 2 \
-  --evaluation_strategy "no" \
-  --save_strategy "epoch" \
-  --save_steps 10000 \
-  --save_total_limit 5 \
-  --learning_rate 5e-6 \
-  --weight_decay 0. \
-  --warmup_ratio 0.03 \
-  --lr_scheduler_type "cosine" \
-  --logging_steps 1 \
-  --fsdp "full_shard auto_wrap" \
-  --fsdp_transformer_layer_cls_to_wrap 'MistralDecoderLayer' \
-  --tf32 True \
-  --model_max_length 2048 \
-  --gradient_checkpointing True \
-  --conv_temp "openchat" \
-  --lazy_preprocess True \
-  --flash_attn_transformers True \
-  --do_train \
-  --do_distill \
-  --distill_with_ref_model True \
-  --distill_with_aligned_model_0 True \
-  --distill_with_aligned_model_1 False \
-  --distill_loss_type "ce" \
-  --distill_teacher_temperature 1.0 \
-  --lm_loss_weight 0.9 \
-  --distill_greater_as_gt True \
-  --distill_greater_as_gt_type hard \
-  --dataloader_num_workers 8 \
-  --remove_unused_columns False
-sleep 60s
-yhcancel 2510