| { | |
| "stage": "sft", | |
| "do_train": true, | |
| "model_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit", | |
| "dataset": "sql_create_context_v4", | |
| "template": "llama3", | |
| "finetuning_type": "lora", | |
| "lora_target": "all", | |
| "output_dir": "llama3_lora", | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 4, | |
| "lr_scheduler_type": "cosine", | |
| "logging_steps": 10, | |
| "warmup_ratio": 0.1, | |
| "save_steps": 1000, | |
| "learning_rate": 5e-05, | |
| "num_train_epochs": 100, | |
| "max_samples": 1000, | |
| "max_grad_norm": 1.0, | |
| "quantization_bit": 4, | |
| "loraplus_lr_ratio": 16.0, | |
| "fp16": true | |
| } |