| use_ray: false | |
| model: Qwen/Qwen2.5-VL-3B-Instruct | |
| dataset: modelscope/competition_math#16 | |
| num_return_sequences: 4 | |
| max_length: 2048 | |
| system: "You are a math model, you should **think step by step** carefully, and always consider the basic math principles to avoid making calculating mistakes. Give the final answer wrapped with \\boxed{{}}" | |
| load_args: false | |
| sampler_engine: vllm | |
| max_new_tokens: 768 | |
| orm_model: math | |
| prm_model: Qwen/Qwen2.5-Math-PRM-7B | |
| override_exist_file: true | |
| num_sampling_batch_size: 4 | |
| top_p: 1.0 | |
| temperature: 1.0 | |
| prm_threshold: 0.8 | |
| output_file: sampling.jsonl | |
| device_groups: | |
| nproc_per_node: 4 | |
| sample_group: | |
| device: GPU | |
| ranks: list(range(0, 2)) | |
| workers: | |
| - sampler | |
| rm_group: | |
| device: GPU | |
| ranks: list(range(2, 4)) | |
| workers: | |
| - prm | |
| - orm | |