| { | |
| "adapter_path": "trail-nexus-ai/models/qwen3-0.6b-routing-specialist/adapters", | |
| "batch_size": 2, | |
| "config": "trail-nexus-ai/models/qwen3-0.6b-routing-specialist/lora_config.yaml", | |
| "data": "trail-nexus-ai/training/data/routing_sft_v1", | |
| "fine_tune_type": "lora", | |
| "grad_accumulation_steps": 2, | |
| "grad_checkpoint": true, | |
| "iters": 4000, | |
| "learning_rate": 5e-05, | |
| "lora_parameters": { | |
| "alpha": 32, | |
| "dropout": 0.05, | |
| "keys": [ | |
| "self_attn.q_proj", | |
| "self_attn.v_proj", | |
| "self_attn.k_proj", | |
| "self_attn.o_proj" | |
| ], | |
| "rank": 16, | |
| "scale": 1.0 | |
| }, | |
| "lr_schedule": null, | |
| "mask_prompt": false, | |
| "max_seq_length": 2048, | |
| "model": "/Users/justinraj/.cache/huggingface/hub/models--Qwen--Qwen3-0.6B/snapshots/c1899de289a04d12100db370d81485cdf75e47ca", | |
| "num_layers": 16, | |
| "optimizer": "adam", | |
| "optimizer_config": { | |
| "adam": {}, | |
| "adamw": {}, | |
| "muon": {}, | |
| "sgd": {}, | |
| "adafactor": {} | |
| }, | |
| "project_name": null, | |
| "report_to": null, | |
| "resume_adapter_file": null, | |
| "save_every": 500, | |
| "seed": 42, | |
| "steps_per_eval": 200, | |
| "steps_per_report": 10, | |
| "test": false, | |
| "test_batches": 500, | |
| "train": true, | |
| "val_batches": 25 | |
| } |