trainer: nnodes: 1 n_gpus_per_node: 1 data: path: ./data/parquet/test.parquet prompt_key: prompt n_samples: 1 output_path: ./checkpoints/grammar_generation.parquet batch_size: 1 model: path: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B external_lib: null load_param: False load_param_path: null rollout: name: vllm mode: sync # sync: LLM, async: AsyncLLM temperature: 0.0 top_k: -1 # 0 for hf rollout, -1 for vllm rollout top_p: 1.0 max_loras: 1 prompt_length: 1800 response_length: 512 # for vllm rollout dtype: bfloat16 # should align with FSDP gpu_memory_utilization: 0.9 # ↑ allow cache to allocate ignore_eos: False enforce_eager: True free_cache_engine: True load_format: dummy_dtensor tensor_model_parallel_size: 1 max_num_batched_tokens: 8192 max_model_len: 1800 # ≥ 1200 + 512 max_num_seqs: 1024 log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu log_prob_micro_batch_size_per_gpu: 1 # for fire vllm rollout use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 # for hf rollout do_sample: True disable_log_stats: False enable_chunked_prefill: True # OK because 8192 ≥ 3072 n: 1 # if beam search activated, top_k, temperature and top_p will be ignored actor: strategy: fsdp # This is for backward-compatibility ulysses_sequence_parallel_size: 1 # sp size fsdp_config: fsdp_size: -1 ray_init: num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.