| { | |
| "num_samples_train": 1944, | |
| "world_size": 1, | |
| "effective_batch_size": 32, | |
| "steps_per_epoch": 61, | |
| "save_steps": 7, | |
| "saves_per_epoch": 8, | |
| "total_steps_est": 61, | |
| "approx_ckpts": 9, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj" | |
| ], | |
| "lora_r": 8, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.1, | |
| "use_max_len": 16384, | |
| "skip_tool_only_assistant": false, | |
| "assistant_tag": "<|start_header_id|>assistant<|end_header_id|>", | |
| "tool_use_token": "<|use_tool|>", | |
| "merge_lora_final": false, | |
| "enable_span_loss_weighting": false, | |
| "tool_span_loss_weight": 0.3 | |
| } |