use_ray: false model: Qwen/Qwen2.5-VL-3B-Instruct dataset: modelscope/competition_math#16 num_return_sequences: 4 max_length: 2048 system: "You are a math model, you should **think step by step** carefully, and always consider the basic math principles to avoid making calculating mistakes. Give the final answer wrapped with \\boxed{{}}" load_args: false sampler_engine: vllm max_new_tokens: 768 orm_model: math prm_model: Qwen/Qwen2.5-Math-PRM-7B override_exist_file: true num_sampling_batch_size: 4 top_p: 1.0 temperature: 1.0 prm_threshold: 0.8 output_file: sampling.jsonl device_groups: nproc_per_node: 4 sample_group: device: GPU ranks: list(range(0, 2)) workers: - sampler rm_group: device: GPU ranks: list(range(2, 4)) workers: - prm - orm