data: train_files: hiyouga/math12k@train val_files: hiyouga/math12k@test prompt_key: question answer_key: answer image_key: images image_dir: null max_prompt_length: 8192 max_response_length: 16384 rollout_batch_size: 128 val_batch_size: 256 format_prompt: /nas/shared/kilab/wangyujia/EasyR1/examples/format_prompt/bio_format.jinja override_chat_template: null shuffle: true seed: 1 min_pixels: 262144 max_pixels: 4194304 filter_overlong_prompts: true algorithm: adv_estimator: grpo disable_kl: false use_kl_loss: true kl_penalty: low_var_kl kl_coef: 1.0e-2 worker: actor: global_batch_size: 64 micro_batch_size_per_device_for_update: 2 #跟gpu显存相关 micro_batch_size_per_device_for_experience: 16 max_grad_norm: 1.0 padding_free: true ulysses_sequence_parallel_size: 1 model: model_path: Qwen/Qwen2.5-7B-Instruct enable_gradient_checkpointing: true trust_remote_code: false freeze_vision_tower: false optim: lr: 1.0e-6 weight_decay: 1.0e-2 strategy: adamw # {adamw, adamw_bf16} lr_warmup_ratio: 0.0 fsdp: enable_full_shard: true enable_cpu_offload: false enable_rank0_init: true offload: offload_params: true # true: more CPU memory; false: more GPU memory offload_optimizer: true # true: more CPU memory; false: more GPU memory rollout: n: 5 temperature: 1.0 top_p: 0.99 gpu_memory_utilization: 0.6 enforce_eager: false enable_chunked_prefill: false tensor_parallel_size: 2 limit_images: 0 max_num_batched_tokens: 24576 val_override_config: temperature: 0.5 n: 1 ref: fsdp: enable_full_shard: true enable_cpu_offload: true # true: more CPU memory; false: more GPU memory enable_rank0_init: true offload: offload_params: false reward: reward_type: batch reward_function: /nas/shared/kilab/wangyujia/EasyR1/examples/reward_function/bio.py:compute_score trainer: total_epochs: 15 max_steps: null project_name: easy_r1 experiment_name: qwen2_5_7b_math_grpo logger: ["console", "wandb"] nnodes: 1 n_gpus_per_node: 8 val_freq: 5 # -1 to disable val_before_train: true val_only: false val_generations_to_log: 3 save_freq: 5 # -1 to disable save_limit: 3 # -1 to disable save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_06162150 load_checkpoint_path: null