Upload config.yaml with huggingface_hub

92cc4ec verified 7 days ago

4.24 kB

	custom_eval:
	comparisons_per_task: 5
	confusion_matrix:
	- mw
	custom_eval_random_seed: 42
	eval_types:
	- reward_alignment
	- policy_ranking
	max_comparisons: null
	num_examples_per_quality_pr: 5
	num_partial_successes: 5
	pad_frames: true
	policy_ranking:
	- amburger66_robotsmith_rbm_task00_robotsmith
	policy_ranking_max_tasks: 100
	quality_preference:
	- mw
	reward_alignment:
	- amburger66_robotsmith_rbm_task00_robotsmith
	reward_alignment_max_trajectories: 10
	subsample_n_frames: null
	use_frame_steps: true
	data:
	data_source_weights: null
	dataloader_num_workers: 8
	dataloader_persistent_workers: true
	dataloader_pin_memory: true
	dataset_preference_ratio: 0.7
	dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
	dataset_type: rbm
	eval_datasets:
	- amburger66_robotsmith_rbm_task00_robotsmith
	eval_subset_size: null
	load_embeddings: false
	max_frames: 16
	max_frames_after_preprocessing: 64
	max_success: 1.0
	max_trajectories: -1
	min_frames_per_trajectory: 5
	min_success: 0.5
	partial_success_threshold: 0.2
	predict_last_frame_partial_progress: false
	preference_strategy_ratio:
	- 1.0
	- 1.0
	- 1.0
	- 1.0
	progress_discrete_bins: 10
	progress_loss_type: discrete
	progress_pred_type: absolute_first_frame
	progress_strategy_ratio:
	- 1.0
	- 1.0
	- 1.0
	- 1.0
	resized_height: null
	resized_width: null
	sample_type_ratio:
	- 1.0
	- 0.0
	- 0.0
	seed: 42
	shuffle: true
	shuffle_progress_frames: false
	train_datasets:
	- amburger66_robotsmith_rbm_task00_robotsmith
	traj_same_source_prob: 0.5
	use_multi_image: true
	use_per_frame_progress_token: true
	debug: false
	logging:
	log_level: INFO
	log_to:
	- wandb
	save_best:
	greater_is_better:
	- true
	- true
	hub_private: false
	hub_save_every: 1000
	hub_token: null
	keep_top_k: 5
	metric_names:
	- eval_rew_align/pearson_amburger66_robotsmith_rbm_task00_robotsmith
	- eval_p_rank/kendall_last_amburger66_robotsmith_rbm_task00_robotsmith
	save_every: 1000
	upload_to_hub: false
	save_model: true
	save_processor: true
	wandb_entity: r-pad
	wandb_mode: null
	wandb_notes: fine-tuning Robometer on RobotSmith
	wandb_project: rbm-finetune-robotsmith
	loss:
	predict_last_frame_progress: false
	progress_discrete_bins: 10
	progress_loss_type: discrete
	success_positive_weight: 1.0
	mode: train
	model:
	average_temporal_patches: true
	base_model_id: Qwen/Qwen3-VL-4B-Instruct
	frame_pooling: mean
	frame_pooling_attn_temperature: 1.0
	model_type: default
	peft_vision_encoder: false
	progress_discrete_bins: 10
	progress_loss_type: discrete
	quantization: false
	rewind: null
	torch_dtype: bfloat16
	train_language_model: true
	train_preference_head: true
	train_progress_head: true
	train_success_head: true
	train_vision_encoder: false
	trust_remote_code: true
	use_multi_image: true
	use_peft: true
	use_per_frame_progress_token: true
	use_unsloth: true
	peft:
	bias: none
	lora_alpha: 64
	lora_dropout: 0.05
	peft_vision_encoder: false
	r: 32
	target_modules:
	- q_proj
	- k_proj
	- v_proj
	- o_proj
	- gate_proj
	- up_proj
	- down_proj
	trainer_cls: rbm_heads
	training:
	beta: 0.1
	bf16: true
	custom_eval_steps: 50
	dataloader_num_workers: 8
	dataloader_persistent_workers: true
	dataloader_pin_memory: true
	ddp_bucket_cap_mb: 25
	ddp_find_unused_parameters: false
	do_eval: true
	eval_steps: 50
	evaluation_strategy: steps
	exp_name: lora_task00
	fp16: false
	gradient_accumulation_steps: 1
	gradient_checkpointing: true
	learning_rate: 2.0e-05
	load_from_checkpoint: robometer/Robometer-4B
	logging_steps: 1
	lr_scheduler_type: cosine
	max_grad_norm: 10.0
	max_seq_length: 1024
	max_steps: 1000
	num_gpus: 2
	num_train_epochs: -1
	output_dir: /data/robometer/logs/task00
	overwrite_output_dir: true
	per_device_eval_batch_size: 16
	per_device_train_batch_size: 8
	predict_pref_progress: true
	prediction_loss_only: true
	remove_unused_columns: false
	resume_from_checkpoint: null
	run_default_eval: false
	save_steps: 200
	save_strategy: 'no'
	vision_encoder_lr: 1.0e-05
	vision_encoder_num_layers: 3
	warmup_ratio: 0.1
	warmup_steps: 0
	weight_decay: 0.01