| data: | |
| train_files: ./datasets/train_RL.parquet | |
| val_files: ./datasets/math500_RL.parquet | |
| prompt_key: problem | |
| answer_key: answer | |
| image_key: images | |
| max_prompt_length: 1024 | |
| max_response_length: 10000 | |
| rollout_batch_size: 256 | |
| val_batch_size: -1 | |
| shuffle: true | |
| seed: 1 | |
| max_pixels: 4194304 | |
| min_pixels: 262144 | |
| algorithm: | |
| adv_estimator: grpo | |
| disable_kl: false | |
| use_kl_loss: true | |
| kl_penalty: low_var_kl | |
| kl_coef: 1.0e-2 | |
| worker: | |
| actor: | |
| global_batch_size: 128 | |
| micro_batch_size_per_device_for_update: 4 | |
| micro_batch_size_per_device_for_experience: 16 | |
| max_grad_norm: 1.0 | |
| padding_free: true | |
| ulysses_sequence_parallel_size: 1 | |
| model: | |
| model_path: /path/to/your/model | |
| enable_gradient_checkpointing: true | |
| trust_remote_code: false | |
| freeze_vision_tower: false | |
| optim: | |
| lr: 1.0e-6 | |
| weight_decay: 1.0e-2 | |
| strategy: adamw # {adamw, adamw_bf16} | |
| lr_warmup_ratio: 0.0 | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: false | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: true # true: more CPU memory; false: more GPU memory | |
| offload_optimizer: true # true: more CPU memory; false: more GPU memory | |
| rollout: | |
| temperature: 1.0 | |
| n: 5 | |
| gpu_memory_utilization: 0.8 | |
| enforce_eager: false | |
| enable_chunked_prefill: false | |
| tensor_parallel_size: 2 | |
| limit_images: 0 | |
| val_override_config: | |
| temperature: 0.0 | |
| n: 1 | |
| ref: | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: true # true: more CPU memory; false: more GPU memory | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: true | |
| reward: | |
| reward_type: function | |
| # score_function: math | |
| score_function: reason_with_in_limit | |
| trainer: | |
| total_episodes: 8 | |
| logger: ["console", "tensorboard"] | |
| project_name: 8ratio_v1 | |
| experiment_name: 8ratio_v1 | |
| n_gpus_per_node: 4 | |
| nnodes: 1 | |
| val_freq: -1 # -1 to disable | |
| val_before_train: false | |
| val_only: false | |
| val_generations_to_log: 1 | |
| save_freq: 1 # -1 to disable | |
| save_limit: 2 # -1 to disable | |
| save_checkpoint_path: training/8ratio_v1 | |
| load_checkpoint_path: null |