Xin-Rui's picture
Upload folder using huggingface_hub
7155cf2 verified
data:
train_files: ./datasets/train_RL.parquet
val_files: ./datasets/math500_RL.parquet
prompt_key: problem
answer_key: answer
image_key: images
max_prompt_length: 1024
max_response_length: 10000
rollout_batch_size: 256
val_batch_size: -1
shuffle: true
seed: 1
max_pixels: 4194304
min_pixels: 262144
algorithm:
adv_estimator: grpo
disable_kl: false
use_kl_loss: true
kl_penalty: low_var_kl
kl_coef: 1.0e-2
worker:
actor:
global_batch_size: 128
micro_batch_size_per_device_for_update: 4
micro_batch_size_per_device_for_experience: 16
max_grad_norm: 1.0
padding_free: true
ulysses_sequence_parallel_size: 1
model:
model_path: /path/to/your/model
enable_gradient_checkpointing: true
trust_remote_code: false
freeze_vision_tower: false
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
strategy: adamw # {adamw, adamw_bf16}
lr_warmup_ratio: 0.0
fsdp:
enable_full_shard: true
enable_cpu_offload: false
enable_rank0_init: true
offload:
offload_params: true # true: more CPU memory; false: more GPU memory
offload_optimizer: true # true: more CPU memory; false: more GPU memory
rollout:
temperature: 1.0
n: 5
gpu_memory_utilization: 0.8
enforce_eager: false
enable_chunked_prefill: false
tensor_parallel_size: 2
limit_images: 0
val_override_config:
temperature: 0.0
n: 1
ref:
fsdp:
enable_full_shard: true
enable_cpu_offload: true # true: more CPU memory; false: more GPU memory
enable_rank0_init: true
offload:
offload_params: true
reward:
reward_type: function
# score_function: math
score_function: reason_with_in_limit
trainer:
total_episodes: 8
logger: ["console", "tensorboard"]
project_name: 8ratio_v1
experiment_name: 8ratio_v1
n_gpus_per_node: 4
nnodes: 1
val_freq: -1 # -1 to disable
val_before_train: false
val_only: false
val_generations_to_log: 1
save_freq: 1 # -1 to disable
save_limit: 2 # -1 to disable
save_checkpoint_path: training/8ratio_v1
load_checkpoint_path: null