yuccaaa
/

nas

Model card Files Files and versions

Metrics Training metrics Community

nas / EasyR1 /examples /config.yaml

yuccaaa's picture

Add files using upload-large-folder tool

052f594 verified 6 months ago

history blame contribute delete

2.47 kB

	data:
	train_files: hiyouga/math12k@train
	val_files: hiyouga/math12k@test
	prompt_key: question
	answer_key: answer
	image_key: images
	image_dir: null
	max_prompt_length: 8192
	max_response_length: 16384
	rollout_batch_size: 128
	val_batch_size: 256
	format_prompt: /nas/shared/kilab/wangyujia/EasyR1/examples/format_prompt/bio_format.jinja
	override_chat_template: null
	shuffle: true
	seed: 1
	min_pixels: 262144
	max_pixels: 4194304
	filter_overlong_prompts: true

	algorithm:
	adv_estimator: grpo
	disable_kl: false
	use_kl_loss: true
	kl_penalty: low_var_kl
	kl_coef: 1.0e-2

	worker:
	actor:
	global_batch_size: 64
	micro_batch_size_per_device_for_update: 2 #跟gpu显存相关
	micro_batch_size_per_device_for_experience: 16
	max_grad_norm: 1.0
	padding_free: true
	ulysses_sequence_parallel_size: 1
	model:
	model_path: Qwen/Qwen2.5-7B-Instruct
	enable_gradient_checkpointing: true
	trust_remote_code: false
	freeze_vision_tower: false
	optim:
	lr: 1.0e-6
	weight_decay: 1.0e-2
	strategy: adamw # {adamw, adamw_bf16}
	lr_warmup_ratio: 0.0
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: false
	enable_rank0_init: true
	offload:
	offload_params: true # true: more CPU memory; false: more GPU memory
	offload_optimizer: true # true: more CPU memory; false: more GPU memory

	rollout:
	n: 5
	temperature: 1.0
	top_p: 0.99
	gpu_memory_utilization: 0.6
	enforce_eager: false
	enable_chunked_prefill: false
	tensor_parallel_size: 2
	limit_images: 0
	max_num_batched_tokens: 24576
	val_override_config:
	temperature: 0.5
	n: 1

	ref:
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: true # true: more CPU memory; false: more GPU memory
	enable_rank0_init: true
	offload:
	offload_params: false

	reward:
	reward_type: batch
	reward_function: /nas/shared/kilab/wangyujia/EasyR1/examples/reward_function/bio.py:compute_score

	trainer:
	total_epochs: 15
	max_steps: null
	project_name: easy_r1
	experiment_name: qwen2_5_7b_math_grpo
	logger: ["console", "wandb"]
	nnodes: 1
	n_gpus_per_node: 8
	val_freq: 5 # -1 to disable
	val_before_train: true
	val_only: false
	val_generations_to_log: 3
	save_freq: 5 # -1 to disable
	save_limit: 3 # -1 to disable
	save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio_06162150
	load_checkpoint_path: null