clouds125's picture
Upload folder using huggingface_hub
768d31a verified
#!/bin/bash
CURRENT_DIR=`pwd`
NCCL_DEBUG=INFO
GPU_ID=0
usage() {
echo "Usage: ${0} [-g|--gpuid] " 1>&2
exit 1
}
while [[ $# -gt 0 ]];do
key=${1}
case ${key} in
-g|--gpuid)
GPU_ID=${2}
shift 2
;;
*)
usage
shift
;;
esac
done
function pretrain() {
SCRIPT_PATH="src/pre_training/pretrain.py"
python $SCRIPT_PATH \
--model_type codet5_CC \
--warmup_steps 500 \
--learning_rate 3e-4 \
--num_train_epochs 30 \
--model_name_or_path Salesforce/codet5-base \
--tokenizer_name Salesforce/codet5-base \
--data_dir ${CURRENT_DIR}/Dataset/pre-training \
--output_dir ${CURRENT_DIR}/outputs/models/pre-training \
--always_save_model \
--train_batch_size 32 \
--gradient_accumulation_steps 4 \
--eval_batch_size 4 \
--max_source_length 512 \
--max_target_length 128 \
--gpu_id ${GPU_ID} \
--mask_rate 0.15 \
--save_steps 6000 \
--log_steps 5 \
--train_steps 800000 \
--treesitter_path ${CURRENT_DIR}/myParser/my-languages.so
}
pretrain;