| | data: |
| | train_files: hiyouga/math12k@train |
| | val_files: hiyouga/math12k@test |
| | prompt_key: question |
| | answer_key: answer |
| | image_key: images |
| | image_dir: null |
| | max_prompt_length: 8192 |
| | max_response_length: 16384 |
| | rollout_batch_size: 128 |
| | val_batch_size: 256 |
| | format_prompt: /nas/shared/kilab/wangyujia/EasyR1/examples/format_prompt/bio_format.jinja |
| | override_chat_template: null |
| | shuffle: true |
| | seed: 1 |
| | min_pixels: 262144 |
| | max_pixels: 4194304 |
| | filter_overlong_prompts: true |
| |
|
| | algorithm: |
| | adv_estimator: grpo |
| | disable_kl: false |
| | use_kl_loss: true |
| | kl_penalty: low_var_kl |
| | kl_coef: 1.0e-2 |
| |
|
| | worker: |
| | actor: |
| | global_batch_size: 64 |
| | micro_batch_size_per_device_for_update: 2 |
| | micro_batch_size_per_device_for_experience: 16 |
| | max_grad_norm: 1.0 |
| | padding_free: true |
| | ulysses_sequence_parallel_size: 1 |
| | model: |
| | model_path: Qwen/Qwen2.5-7B-Instruct |
| | enable_gradient_checkpointing: true |
| | trust_remote_code: false |
| | freeze_vision_tower: false |
| | optim: |
| | lr: 1.0e-6 |
| | weight_decay: 1.0e-2 |
| | strategy: adamw |
| | lr_warmup_ratio: 0.0 |
| | fsdp: |
| | enable_full_shard: true |
| | enable_cpu_offload: false |
| | enable_rank0_init: true |
| | offload: |
| | offload_params: true |
| | offload_optimizer: true |
| |
|
| | rollout: |
| | n: 5 |
| | temperature: 1.0 |
| | top_p: 0.99 |
| | gpu_memory_utilization: 0.6 |
| | enforce_eager: false |
| | enable_chunked_prefill: false |
| | tensor_parallel_size: 2 |
| | limit_images: 0 |
| | max_num_batched_tokens: 24576 |
| | val_override_config: |
| | temperature: 0.5 |
| | n: 1 |
| |
|
| | ref: |
| | fsdp: |
| | enable_full_shard: true |
| | enable_cpu_offload: true |
| | enable_rank0_init: true |
| | offload: |
| | offload_params: false |
| |
|
| | reward: |
| | reward_type: batch |
| | reward_function: /nas/shared/kilab/wangyujia/EasyR1/examples/reward_function/bio.py:compute_score |
| |
|
| | trainer: |
| | total_epochs: 15 |
| | max_steps: null |
| | project_name: easy_r1 |
| | experiment_name: qwen2_5_7b_math_grpo |
| | logger: ["console", "wandb"] |
| | nnodes: 1 |
| | n_gpus_per_node: 8 |
| | val_freq: 5 |
| | val_before_train: true |
| | val_only: false |
| | val_generations_to_log: 3 |
| | save_freq: 5 |
| | save_limit: 3 |
| | save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_06162150 |
| | load_checkpoint_path: null |
| |
|