| | trainer:
|
| | nnodes: 1
|
| | n_gpus_per_node: 8
|
| |
|
| | data:
|
| | path: ~/data/rlhf/math/test.parquet
|
| | prompt_key: prompt
|
| | n_samples: 5
|
| | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
|
| | batch_size: 128
|
| |
|
| | model:
|
| | path: ~/models/Qwen2-7B-Instruct
|
| | external_lib: null
|
| | rollout:
|
| | name: vllm
|
| | mode: sync
|
| | temperature: 1.0
|
| | top_k: 50
|
| | top_p: 0.7
|
| | prompt_length: 1536
|
| | response_length: 512
|
| |
|
| | dtype: bfloat16
|
| | gpu_memory_utilization: 0.5
|
| | ignore_eos: False
|
| | enforce_eager: True
|
| | free_cache_engine: True
|
| | load_format: dummy_dtensor
|
| | tensor_model_parallel_size: 1
|
| | max_num_batched_tokens: 8192
|
| | max_model_len: null
|
| | max_num_seqs: 1024
|
| | log_prob_micro_batch_size: null
|
| | log_prob_micro_batch_size_per_gpu: 8
|
| |
|
| | use_fire_sampling: False
|
| |
|
| | do_sample: True
|
| | disable_log_stats: True
|
| | enable_chunked_prefill: True
|
| | n: 1
|
| | actor:
|
| | strategy: fsdp
|
| | ulysses_sequence_parallel_size: 1
|
| | fsdp_config:
|
| | fsdp_size: -1
|
| |
|
| | ray_init:
|
| | num_cpus: null
|
| |
|