{ "model": "meta-llama/Llama-3.1-8B-Instruct", "job_name": "test_sft_checkpointing_resume", "project_name": "test_sft_checkpointing_resume", "data_path": "jacpetro/CodeAlpaca-20k-no-input", "push_to_hub": false, "repo_id": "jacpetro/test_resuming_sft", "username": null, "comet_ml_key": "l8Xc7g7XYFR5Jjl1qXDH2WG2e", "train_split": "train", "train_subset": null, "valid_split": null, "valid_subset": null, "add_eos_token": false, "block_size": -1, "model_max_length": 2048, "padding": "left", "trainer": "sft", "use_flash_attention_2": true, "log": [ "comet_ml" ], "disable_gradient_checkpointing": false, "logging_steps": 1, "eval_strategy": "epoch", "save_total_limit": 1, "save_strategy": "steps", "auto_find_batch_size": false, "mixed_precision": "bf16", "lr": 0.0003, "epochs": 3, "batch_size": 1, "warmup_ratio": 0.1, "gradient_accumulation_steps": 16, "optimizer": "adamw_torch", "lr_scheduler_type": "linear", "weight_decay": 0.0, "max_grad_norm": 1.0, "seed": 42, "save_steps": 10, "eval_steps": null, "load_best_model_at_end": false, "resume_from_checkpoint": null, "user_checkpoint_dir": "", "neftune_noise_alpha": null, "use_deepspeed": null, "apply_chat_template": "tokenizer", "torch_dtype": null, "use_torch_compile": false, "quantization": "nf4", "double_quantization": false, "use_peft": "lora", "lora_r": 32, "lora_alpha": 64, "lora_dropout": 0.05, "init_lora_weights": null, "use_rslora": false, "adalora_init_r": 12, "adalora_target_r": 8, "llama_adapter_len": 128, "llama_adapter_layers": 8, "target_modules": null, "merge_adapter": true, "model_ref": null, "dpo_beta": 0.1, "max_prompt_length": 128, "max_completion_length": null, "prompt_text_column": "prompt", "text_column": "messages", "rejected_text_column": "text", "use_unsloth": false }