| { |
| "adapter_path": "./qwen3_adapters", |
| "batch_size": 1, |
| "config": null, |
| "data": "./qwen3_math_data", |
| "fine_tune_type": "lora", |
| "grad_checkpoint": true, |
| "iters": 1000, |
| "learning_rate": 2e-05, |
| "lora_parameters": { |
| "rank": 8, |
| "dropout": 0.0, |
| "scale": 20.0 |
| }, |
| "lr_schedule": null, |
| "mask_prompt": true, |
| "max_seq_length": 3000, |
| "model": "Qwen/Qwen3-0.6B", |
| "num_layers": 16, |
| "optimizer": "adam", |
| "optimizer_config": { |
| "adam": {}, |
| "adamw": {} |
| }, |
| "resume_adapter_file": null, |
| "save_every": 100, |
| "seed": 0, |
| "steps_per_eval": 200, |
| "steps_per_report": 25, |
| "test": true, |
| "test_batches": 500, |
| "train": true, |
| "val_batches": 25, |
| "wandb": null |
| } |