{ "adapter_path": "train/adapters", "batch_size": 1, "clear_cache_threshold": 0, "config": "train/lora_config.yaml", "data": "train/data", "fine_tune_type": "lora", "grad_accumulation_steps": 16, "grad_checkpoint": true, "iters": 350, "learning_rate": 0.0001, "lora_parameters": { "rank": 32, "scale": 20.0, "dropout": 0.0, "keys": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj" ] }, "lr_schedule": { "name": "cosine_decay", "warmup": 20, "arguments": [ 0.0001, 350, 1e-06 ] }, "mask_prompt": true, "max_seq_length": 8192, "model": "mlx-community/Qwen2.5-14B-Instruct-4bit", "num_layers": 16, "optimizer": "adamw", "optimizer_config": { "adam": {}, "adamw": {}, "muon": {}, "sgd": {}, "adafactor": {} }, "project_name": null, "report_to": null, "resume_adapter_file": null, "save_every": 50, "seed": 42, "steps_per_eval": 50, "steps_per_report": 10, "test": false, "test_batches": 500, "train": true, "val_batches": 25 }