CUDA_VISIBLE_DEVICES=0 WANDB_PROJECT=personal-code-copilot python3 train.py \ --model_name_or_path "codellama/CodeLlama-7b-Instruct-hf" \ --dataset_name "smangrul/hug_stack" \ --splits "train" \ --max_seq_len 2048 \ --max_steps 2000 \ --save_steps 500 \ --eval_steps 100 \ --logging_steps 5 \ --log_level "info" \ --logging_strategy "steps" \ --save_strategy "steps" \ --push_to_hub \ --hub_private_repo True \ --hub_strategy "every_save" \ --bf16 True \ --learning_rate 3e-4 \ --lr_scheduler_type "cosine" \ --weight_decay 0.1 \ --warmup_ratio 0.1 \ --max_grad_norm 1.0 \ --output_dir "codellama-hugcoder" \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 4 \ --gradient_checkpointing True \ --use_reentrant True \ --dataset_text_field "text" \ --test_size 0.1 \ --fim_rate 0.5 \ --fim_spm_rate 0.5 \ --use_peft_lora True \ --lora_r 32 \ --lora_alpha 64 \ --lora_dropout 0.1 \ --lora_target_modules "all-linear" \ --use_4bit_quantization True \ --use_nested_quant True \ --bnb_4bit_compute_dtype "bfloat16" \ --use_flash_attn True