HRA / llama /tune.sh
nvan13's picture
Upload folder using huggingface_hub
46caca0 verified
# BASE_MODEL="facebook/opt-125m" #"meta-llama/Llama-2-7b-hf facebook/opt-125m
BASE_MODEL="meta-llama/Llama-2-7b-hf"
# DATA_PATH="./data/MetaMathQA.json"
DATA_PATH="./data/MetaMathQA-40K.json"
OUTPUT="output/cp3e5"
export WANDB_PROJECT="HRA_MetaMath395"
# python finetune_32.py \
# --model_name_or_path $BASE_MODEL \
# --output_dir $OUTPUT \
# --hrft_r 32 \
# --init_a 1e-4 \
# --eps 1e-4 \
# --add_orth "none" \
# --lamda 1e-4 \
# --data_path $DATA_PATH \
# --dataset_split "train[:100000]"\
# --dataset_field query response \
# --num_train_epochs 2 \
# --per_device_train_batch_size 8 \
# --gradient_accumulation_steps 4 \
# --save_strategy "steps" \
# --save_steps 0 \
# --save_total_limit 1 \
# --learning_rate 3e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.005 \
# --lr_scheduler_type "cosine" \
# --logging_steps 1000 \
# --bf16 True \
# --tf32 True \
# --report_to "none" \
# wandb sync wandb/latest-run
# OUTPUT="output/cp1e5N"
# python finetune_32.py \
# --model_name_or_path $BASE_MODEL \
# --output_dir $OUTPUT \
# --hrft_r 32 \
# --init_a 1e-4 \
# --eps 1e-4 \
# --add_orth "none" \
# --lamda 1e-4 \
# --data_path $DATA_PATH \
# --dataset_split "train[:100000]"\
# --dataset_field query response \
# --num_train_epochs 2 \
# --per_device_train_batch_size 8 \
# --gradient_accumulation_steps 4 \
# --save_strategy "steps" \
# --save_steps 0 \
# --save_total_limit 1 \
# --learning_rate 1e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.005 \
# --lr_scheduler_type "cosine" \
# --logging_steps 1000 \
# --bf16 True \
# --tf32 True \
# --report_to "wandb"
# wandb sync wandb/latest-run
# OUTPUT="output/cpr1"
# python finetune_32.py \
# --model_name_or_path $BASE_MODEL \
# --output_dir $OUTPUT \
# --hrft_r 1 \
# --init_a 1e-4 \
# --eps 1e-4 \
# --add_orth "none" \
# --lamda 1e-4 \
# --data_path $DATA_PATH \
# --dataset_split "train"\
# --dataset_field query response \
# --num_train_epochs 2 \
# --per_device_train_batch_size 32 \
# --gradient_accumulation_steps 1 \
# --save_strategy "steps" \
# --save_steps 0 \
# --save_total_limit 1 \
# --learning_rate 3e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.005 \
# --lr_scheduler_type "cosine" \
# --logging_steps 1000 \
# --bf16 True \
# --tf32 True \
# --report_to "wandb"
# wandb sync wandb/latest-run
# OUTPUT="output/cpr2"
# python finetune_32.py \
# --model_name_or_path $BASE_MODEL \
# --output_dir $OUTPUT \
# --hrft_r 1 \
# --init_a 1e-4 \
# --eps 1e-4 \
# --add_orth "none" \
# --lamda 1e-4 \
# --data_path $DATA_PATH \
# --dataset_split "train"\
# --dataset_field query response \
# --num_train_epochs 3 \
# --per_device_train_batch_size 32 \
# --gradient_accumulation_steps 1 \
# --save_strategy "steps" \
# --save_steps 0 \
# --save_total_limit 1 \
# --learning_rate 3e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.005 \
# --lr_scheduler_type "cosine" \
# --logging_steps 200 \
# --bf16 True \
# --tf32 True \
# --report_to "wandb"
# wandb sync wandb/latest-run
OUTPUT="output/cms3"
python finetune_32.py \
--model_name_or_path $BASE_MODEL \
--output_dir $OUTPUT \
--hrft_r 32 \
--init_a 1e-4 \
--eps 1e-4 \
--add_orth "none" \
--lamda 1e-4 \
--data_path $DATA_PATH \
--dataset_split "train"\
--dataset_field query response \
--num_train_epochs 2 \
--per_device_train_batch_size 8 \
--gradient_accumulation_steps 4 \
--save_strategy "steps" \
--save_steps 0 \
--save_total_limit 1 \
--learning_rate 1e-5 \
--weight_decay 0. \
--warmup_ratio 0.005 \
--lr_scheduler_type "cosine" \
--logging_steps 200 \
--bf16 True \
--tf32 True \
--report_to "wandb"
date +"%F %T"
# wandb sync wandb/latest-run