tfm_reproduction_CCT5 / scripts /finetune_CodeReview.sh
clouds125's picture
Upload folder using huggingface_hub
768d31a verified
#!/bin/bash
CURRENT_DIR=`pwd`
NCCL_DEBUG=INFO
GPU_ID=0
PRETRAINED_MODEL_DIR="$CURRENT_DIR/models/pre-training/Gen"
MODEL_PATH="$CURRENT_DIR/models/pre-training/Gen/pytorch_model.bin"
FINETUNED_MODEL_PATH="$CURRENT_DIR/models/fine-tuning/CodeReview/pytorch_model.bin"
EVAL_FLAG=false
usage() {
echo "Usage: ${0} [-g] [-e]" 1>&2
exit 1
}
while getopts ":g:e:" opt; do
case $opt in
g) GPU_ID="$OPTARG"
;;
e) MODEL_PATH="$OPTARG"
EVAL_FLAG=true
;;
\?)
# if invalid option is provided, print error message and exit
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
# if -e flag is provided without a parameter, set eval variable to true
EVAL_FLAG=true
MODEL_PATH=$FINETUNED_MODEL_PATH
;;
esac
done
function finetune() {
SCRIPT_PATH="src/fine_tuning/finetune_msg_gen.py"
if [[ $EVAL_FLAG == false ]]; then
python $SCRIPT_PATH \
--do_train \
--do_test \
--train_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/train.jsonl \
--dev_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/valid.jsonl \
--test_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/test.jsonl \
--model_type codet5_CC \
--warmup_steps 500 \
--learning_rate 3e-4 \
--tokenizer_name Salesforce/codet5-base \
--model_name_or_path "Salesforce/codet5-base" \
--load_model_path $MODEL_PATH \
--output_dir ${CURRENT_DIR}/outputs/models/fine-tuning/CodeReview \
--always_save_model \
--train_batch_size 32 \
--gradient_accumulation_steps 4 \
--eval_batch_size 8 \
--max_source_length 512 \
--max_target_length 128 \
--gpu_id ${GPU_ID} \
--save_steps 3000 \
--log_steps 5 \
--train_steps 150000 \
--evaluate_sample_size -1
else
python $SCRIPT_PATH \
--do_test \
--train_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/train.jsonl \
--dev_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/valid.jsonl \
--test_filename ${CURRENT_DIR}/Dataset/fine-tuning/CodeReview/test.jsonl \
--model_type codet5_CC \
--warmup_steps 500 \
--learning_rate 3e-4 \
--tokenizer_name Salesforce/codet5-base \
--model_name_or_path "Salesforce/codet5-base" \
--load_model_path $MODEL_PATH \
--output_dir ${CURRENT_DIR}/outputs/models/fine-tuning/CodeReview/${LANG} \
--always_save_model \
--train_batch_size 32 \
--gradient_accumulation_steps 4 \
--eval_batch_size 8 \
--max_source_length 512 \
--max_target_length 128 \
--gpu_id ${GPU_ID} \
--save_steps 3000 \
--log_steps 5 \
--train_steps 150000 \
--evaluate_sample_size -1
fi
}
finetune;