| { | |
| "stage": "sft", | |
| "do_train": true, | |
| "finetuning_type": "lora", | |
| "model": "Qwen/Qwen3-4B-Instruct-2507", | |
| "dataset": "llamafactory/fiqa", | |
| "max_samples": null, | |
| "eval_dataset": null, | |
| "train_dataset_num_samples": 5500, | |
| "eval_dataset_num_samples": null, | |
| "quantization_bit": 4, | |
| "load_in_4bit": true, | |
| "load_in_16bit": false, | |
| "lora_rank": 64, | |
| "lora_r": 64, | |
| "lora_alpha": 128, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "bias": "none", | |
| "use_gradient_checkpointing": "unsloth", | |
| "random_state": 3407, | |
| "learning_rate": 0.00015, | |
| "lr_scheduler_type": "cosine", | |
| "warmup_ratio": 0.05, | |
| "weight_decay": 0.0, | |
| "adam_beta2": 0.999, | |
| "num_train_epochs": 4.0, | |
| "max_steps": -1, | |
| "per_device_train_batch_size": 4, | |
| "gradient_accumulation_steps": 4, | |
| "seq_len": 2048, | |
| "max_seq_length": 2048, | |
| "logging_steps": 20, | |
| "save_strategy": "steps", | |
| "save_steps": 500, | |
| "save_total_limit": 2, | |
| "evaluation_strategy": "no", | |
| "eval_steps": null, | |
| "load_best_model_at_end": false, | |
| "bf16": true, | |
| "report_to": "none", | |
| "dataset_num_proc": 4, | |
| "seed": 3407, | |
| "output_dir": "/root/jb/personas/finance/fiqa/finance_lora_unsloth_output/20260305_020931", | |
| "adam_beta1": 0.9, | |
| "packing": false, | |
| "dataset_text_field": "text" | |
| } | |