Spaces:

braindeck
/

text2text

Paused

text2text / config /infer.yaml

braindeck

Initial commit

bcdf9fa 4 months ago

1.67 kB

	trainer:
	nnodes: 1
	n_gpus_per_node: 1

	data:
	path: ./data/parquet/test.parquet
	prompt_key: prompt
	n_samples: 1
	output_path: ./checkpoints/grammar_generation.parquet
	batch_size: 1

	model:
	path: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
	external_lib: null
	load_param: False
	load_param_path: null

	rollout:
	name: vllm
	mode: sync # sync: LLM, async: AsyncLLM
	temperature: 0.0
	top_k: -1 # 0 for hf rollout, -1 for vllm rollout
	top_p: 1.0
	max_loras: 1
	prompt_length: 1800
	response_length: 512
	# for vllm rollout
	dtype: bfloat16 # should align with FSDP
	gpu_memory_utilization: 0.9 # ↑ allow cache to allocate
	ignore_eos: False
	enforce_eager: True
	free_cache_engine: True
	load_format: dummy_dtensor
	tensor_model_parallel_size: 1
	max_num_batched_tokens: 8192
	max_model_len: 1800 # ≥ 1200 + 512
	max_num_seqs: 1024
	log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
	log_prob_micro_batch_size_per_gpu: 1
	# for fire vllm rollout
	use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
	# for hf rollout
	do_sample: True
	disable_log_stats: False
	enable_chunked_prefill: True # OK because 8192 ≥ 3072
	n: 1
	# if beam search activated, top_k, temperature and top_p will be ignored

	actor:
	strategy: fsdp # This is for backward-compatibility
	ulysses_sequence_parallel_size: 1 # sp size
	fsdp_config:
	fsdp_size: -1

	ray_init:
	num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.