| { |
| "adapter_path": "adapters/turn80", |
| "alpha": [ |
| 1e-05 |
| ], |
| "batch_size": 1, |
| "beta": 0.02, |
| "config": null, |
| "data": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/strat", |
| "delta": 50.0, |
| "dpo_cpo_loss_type": "sigmoid", |
| "epochs": null, |
| "epsilon": 0.0001, |
| "epsilon_high": 0.02, |
| "fuse": true, |
| "grad_checkpoint": false, |
| "gradient_accumulation_steps": 2, |
| "group_size": 2, |
| "grpo_loss_type": "dr_grpo", |
| "importance_sampling_level": "token", |
| "iters": 1000, |
| "judge": "mlx-community/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2-4-bit", |
| "judge_config": {}, |
| "learning_rate": 1e-06, |
| "list_reward_functions": false, |
| "load_in_4bits": true, |
| "load_in_6bits": false, |
| "load_in_8bits": false, |
| "lora_parameters": { |
| "rank": 64, |
| "alpha": 128, |
| "dropout": 0.0, |
| "scale": 2.0 |
| }, |
| "lr_schedule": null, |
| "mask_prompt": false, |
| "max_completion_length": 512, |
| "max_seq_length": 2048, |
| "model": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z", |
| "num_layers": -1, |
| "optimizer": "adamw", |
| "optimizer_config": { |
| "adam": {}, |
| "adamw": {}, |
| "muon": {}, |
| "qhadam": {} |
| }, |
| "reference_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen-4B-Thinking-2507.z", |
| "resume_adapter_file": "adapters/turn80/adapters.safetensors", |
| "reward_functions": "r1_semantic_similarity_reward,r1_conditional_content_reward,r1_velocity_to_correct_thinking_reward,r1_format_reward,r1_tag_structure_reward,r1_thinking_quality_reward", |
| "reward_functions_file": null, |
| "reward_scaling": 1.0, |
| "reward_weights": "[0.25, 0.25, 0.20, 0.10, 0.10, 0.10]", |
| "save_every": 6, |
| "seed": 360, |
| "steps_per_eval": 50, |
| "steps_per_report": 1, |
| "temperature": 0.8, |
| "test": false, |
| "test_batches": 500, |
| "train": true, |
| "train_mode": "grpo", |
| "train_type": "lora", |
| "val_batches": 1, |
| "wandb": "mlx-lm-grpo-v3.16" |
| } |