trainer:
  nnodes: 1
  n_gpus_per_node: 1

data:
  path: ./data/parquet/test.parquet
  prompt_key: prompt
  n_samples: 1
  output_path: ./checkpoints/grammar_generation.parquet
  batch_size: 1

model:
  path: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
  external_lib: null
  load_param: False
  load_param_path: null

rollout:
  name: vllm
  mode: sync # sync: LLM, async: AsyncLLM
  temperature: 0.0
  top_k: -1 # 0 for hf rollout, -1 for vllm rollout
  top_p: 1.0
  max_loras: 1  
  prompt_length: 1800
  response_length: 512
  # for vllm rollout
  dtype: bfloat16 # should align with FSDP
  gpu_memory_utilization: 0.9   # ↑ allow cache to allocate
  ignore_eos: False
  enforce_eager: True
  free_cache_engine: True
  load_format: dummy_dtensor
  tensor_model_parallel_size: 1
  max_num_batched_tokens: 8192
  max_model_len: 1800           # ≥ 1200 + 512
  max_num_seqs: 1024
  log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
  log_prob_micro_batch_size_per_gpu: 1
  # for fire vllm rollout
  use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
  # for hf rollout
  do_sample: True
  disable_log_stats: False
  enable_chunked_prefill: True   # OK because 8192 ≥ 3072
  n: 1
  # if beam search activated, top_k, temperature and top_p will be ignored

actor:
  strategy: fsdp  # This is for backward-compatibility
  ulysses_sequence_parallel_size: 1 # sp size
  fsdp_config:
    fsdp_size: -1

ray_init:
  num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.