CUDA_VISIBLE_DEVICES=0 WANDB_PROJECT=personal-code-copilot python3 train.py \
--model_name_or_path "codellama/CodeLlama-7b-Instruct-hf" \
--dataset_name "smangrul/hug_stack" \
--splits "train" \
--max_seq_len 2048 \
--max_steps 2000 \
--save_steps 500 \
--eval_steps 100 \
--logging_steps 5 \
--log_level "info" \
--logging_strategy "steps" \
--save_strategy "steps" \
--push_to_hub \
--hub_private_repo True \
--hub_strategy "every_save" \
--bf16 True \
--learning_rate 3e-4 \
--lr_scheduler_type "cosine" \
--weight_decay 0.1 \
--warmup_ratio 0.1 \
--max_grad_norm 1.0 \
--output_dir "codellama-hugcoder" \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 4 \
--gradient_checkpointing True \
--use_reentrant True \
--dataset_text_field "text" \
--test_size 0.1 \
--fim_rate 0.5 \
--fim_spm_rate 0.5 \
--use_peft_lora True \
--lora_r 32 \
--lora_alpha 64 \
--lora_dropout 0.1 \
--lora_target_modules "all-linear" \
--use_4bit_quantization True \
--use_nested_quant True \
--bnb_4bit_compute_dtype "bfloat16" \
--use_flash_attn True