text2text / config /infer.yaml
braindeck
Initial commit
bcdf9fa
trainer:
nnodes: 1
n_gpus_per_node: 1
data:
path: ./data/parquet/test.parquet
prompt_key: prompt
n_samples: 1
output_path: ./checkpoints/grammar_generation.parquet
batch_size: 1
model:
path: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
external_lib: null
load_param: False
load_param_path: null
rollout:
name: vllm
mode: sync # sync: LLM, async: AsyncLLM
temperature: 0.0
top_k: -1 # 0 for hf rollout, -1 for vllm rollout
top_p: 1.0
max_loras: 1
prompt_length: 1800
response_length: 512
# for vllm rollout
dtype: bfloat16 # should align with FSDP
gpu_memory_utilization: 0.9 # ↑ allow cache to allocate
ignore_eos: False
enforce_eager: True
free_cache_engine: True
load_format: dummy_dtensor
tensor_model_parallel_size: 1
max_num_batched_tokens: 8192
max_model_len: 1800 # ≥ 1200 + 512
max_num_seqs: 1024
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 1
# for fire vllm rollout
use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
# for hf rollout
do_sample: True
disable_log_stats: False
enable_chunked_prefill: True # OK because 8192 ≥ 3072
n: 1
# if beam search activated, top_k, temperature and top_p will be ignored
actor:
strategy: fsdp # This is for backward-compatibility
ulysses_sequence_parallel_size: 1 # sp size
fsdp_config:
fsdp_size: -1
ray_init:
num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.