BudgetThinker_backup / easyr1 /examples /8ratio_v1.yaml

Upload folder using huggingface_hub

7155cf2 verified 4 months ago

2.16 kB

	data:
	train_files: ./datasets/train_RL.parquet
	val_files: ./datasets/math500_RL.parquet
	prompt_key: problem
	answer_key: answer
	image_key: images
	max_prompt_length: 1024
	max_response_length: 10000
	rollout_batch_size: 256
	val_batch_size: -1
	shuffle: true
	seed: 1
	max_pixels: 4194304
	min_pixels: 262144

	algorithm:
	adv_estimator: grpo
	disable_kl: false
	use_kl_loss: true
	kl_penalty: low_var_kl
	kl_coef: 1.0e-2

	worker:
	actor:
	global_batch_size: 128
	micro_batch_size_per_device_for_update: 4
	micro_batch_size_per_device_for_experience: 16
	max_grad_norm: 1.0
	padding_free: true
	ulysses_sequence_parallel_size: 1
	model:
	model_path: /path/to/your/model
	enable_gradient_checkpointing: true
	trust_remote_code: false
	freeze_vision_tower: false
	optim:
	lr: 1.0e-6
	weight_decay: 1.0e-2
	strategy: adamw # {adamw, adamw_bf16}
	lr_warmup_ratio: 0.0
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: false
	enable_rank0_init: true
	offload:
	offload_params: true # true: more CPU memory; false: more GPU memory
	offload_optimizer: true # true: more CPU memory; false: more GPU memory

	rollout:
	temperature: 1.0
	n: 5
	gpu_memory_utilization: 0.8
	enforce_eager: false
	enable_chunked_prefill: false
	tensor_parallel_size: 2
	limit_images: 0
	val_override_config:
	temperature: 0.0
	n: 1

	ref:
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: true # true: more CPU memory; false: more GPU memory
	enable_rank0_init: true
	offload:
	offload_params: true

	reward:
	reward_type: function
	# score_function: math
	score_function: reason_with_in_limit

	trainer:
	total_episodes: 8
	logger: ["console", "tensorboard"]
	project_name: 8ratio_v1
	experiment_name: 8ratio_v1
	n_gpus_per_node: 4
	nnodes: 1
	val_freq: -1 # -1 to disable
	val_before_train: false
	val_only: false
	val_generations_to_log: 1
	save_freq: 1 # -1 to disable
	save_limit: 2 # -1 to disable
	save_checkpoint_path: training/8ratio_v1
	load_checkpoint_path: null