1231: upload g0plus CKPT in TRT format for pp demo in the wild

5f3ded9 verified 4 months ago

9.1 kB

	seed: 7
	resume_ckpt: /vla_fulltime/jianning.cui/code/GalaxeaFM/runs/merge_pipeline/real/r1lite_g0_pp_bbox_400_tasks/2025-12-22_05-53-31/checkpoints/step_124838.pt
	output_dir: ${hydra:runtime.output_dir}
	checkpointing_steps: 17834
	logger:
	type: wandb
	log_steps: 10
	task: ${hydra:runtime.choices.task}
	project: ${split:${logger.task},0}
	experiment_name: ${split:${logger.task},-1}
	mode: online
	workspace: cuijianning1996-galaxea-ai
	dir: ${output_dir}/wandb
	batch_size_val: 16
	eval_episodes_num: 1
	ckpt_path: /data/trt_ckpts/model_state_dict.pt
	env: R1ProBlocksStackEasy
	target_controller_type: bimanual_relaxed_ik
	tags: null
	edp:
	card: null
	training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S}
	git_branch: null
	git_commit: null
	root: null
	repo_ids: null
	save_dir: ${output_dir}
	tags: ${tags}
	max_steps: ${model.max_steps}
	batch_size: ${model.batch_size}
	data:
	_target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset
	dataset_dirs: null
	shape_meta:
	action:
	- key: left_arm
	raw_shape: 6
	shape: 6
	- key: left_gripper
	raw_shape: 1
	shape: 1
	- key: right_arm
	raw_shape: 6
	shape: 6
	- key: right_gripper
	raw_shape: 1
	shape: 1
	state:
	- key: left_arm
	raw_shape: 6
	shape: 6
	- key: left_gripper
	raw_shape: 1
	shape: 1
	- key: right_arm
	raw_shape: 6
	shape: 6
	- key: right_gripper
	raw_shape: 1
	shape: 1
	images:
	- key: head_condition
	raw_shape:
	- 3
	- 224
	- 224
	shape:
	- 3
	- 224
	- 224
	- key: head_rgb
	raw_shape:
	- 3
	- 720
	- 1280
	shape:
	- 3
	- 224
	- 224
	- key: left_wrist_rgb
	raw_shape:
	- 3
	- 720
	- 1280
	shape:
	- 3
	- 224
	- 224
	- key: right_wrist_rgb
	raw_shape:
	- 3
	- 720
	- 1280
	shape:
	- 3
	- 224
	- 224
	action_size: 32
	past_action_size: 0
	obs_size: 1
	ee_start_moving_thresh: 0.0
	val_set_proportion: 0.05
	use_bbox_condition: true
	dataset_root: /galaxea_dataset/galaxea/pp_project/lerobot_with_bbox
	dataset_prefixes:
	- BENCH
	- Bench
	model:
	pretrained_ckpt: /galaxea_dataset/mnt/tmp/pp_wt_img_cond/checkpoints/org2fm_v2.pt
	use_pretrained_norm_stats: true
	model_weights_to_bf16: false
	enable_bf16_training: true
	use_torch_compile: false
	find_unused_parameters: false
	batch_size: 2
	num_workers: 4
	pin_memory: true
	persistent_workers: true
	max_epochs: 4
	max_steps: null
	grad_accumulation_steps: 2
	use_8bit_optimizer: false
	learning_rate: 2.5e-05
	weight_decay: 1.0e-06
	betas:
	- 0.9
	- 0.999
	lr_scheduler_type: cosine
	warmup_steps: 500
	max_grad_norm: 1.0
	use_ema: false
	ema:
	update_after_step: 0
	power: 0.67
	use_sync_bn: false
	processor:
	_target_: galaxea_fm.processors.galaxea_zero_processor.GalaxeaZeroProcessor
	shape_meta: ${data.shape_meta}
	num_obs_steps: ${data.obs_size}
	action_state_transforms:
	- _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform
	keys:
	- left_arm
	- right_arm
	use_stepwise_action_norm: true
	norm_default_mode: z-score
	norm_exception_mode:
	action:
	left_gripper: 0/100
	right_gripper: 0/100
	action_state_merger:
	_target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign
	train_transforms:
	head_condition:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	head_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	left_wrist_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	right_wrist_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	val_transforms:
	head_condition:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	head_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	left_wrist_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	right_wrist_rgb:
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	- _target_: galaxea_fm.transforms.image.ToTensor
	- _target_: torchvision.transforms.Normalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	num_output_cameras: 4
	use_zh_instruction: false
	drop_high_level_prob: 1.0
	pad_token_id: ${model.model_arch.pad_token_id}
	image_token_index: ${model.model_arch.image_token_index}
	tokenizer_params:
	pretrained_model_name_or_path: /data/google/paligemma-3b-pt-224
	local_files_only: false
	token: null
	max_text_tokens: ${model.model_arch.max_text_tokens}
	max_image_text_tokens: ${model.model_arch.max_image_text_tokens}
	num_input_cameras: ${model.model_arch.num_input_images}
	num_image_tokens_per_camera: ${model.model_arch.vision.num_image_tokens}
	model_arch:
	_target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy
	model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero
	pretrained_model_path: /data/google/paligemma-3b-pt-224
	vla_training_strategy: vla-full-train
	backbone_lr_multiplier: 1.0
	image_token_index: 257152
	pad_token_id: 0
	vocab_size: 257216
	cond_steps: ${data.obs_size}
	horizon_steps: ${data.action_size}
	max_text_tokens: 55
	max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens}
	+ ${model.model_arch.max_text_tokens}'}
	num_input_images: ${eval:'${model.model_arch.cond_steps} * ${model.processor.num_output_cameras}'}
	num_extra_image_tokens_per_camera: 0
	final_action_clip_value: null
	action_dim: 14
	proprio_dim: 14
	action_decoder_layers: 2
	action_expert_adaptive_mode: null
	flow_sampling: beta
	num_inference_steps: 10
	vision:
	name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel
	hidden_size: 1152
	intermediate_size: 4304
	num_hidden_layers: 27
	num_attention_heads: 16
	num_channels: 3
	image_size: 224
	patch_size: 14
	layer_norm_eps: 1.0e-06
	attention_dropout: 0.0
	num_image_tokens: 256
	vision_projector:
	name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector
	vision_config:
	hidden_size: 1152
	projection_dim: 2048
	joint:
	name: galaxea_fm.models.galaxea_zero.joint_model.JointModel
	action_expert_adaptive_mode: null
	mixture:
	vlm:
	hidden_size: 2048
	intermediate_size: 16384
	use_final_norm: false
	cache: true
	proprio:
	hidden_size: 1024
	intermediate_size: 4096
	use_final_norm: true
	cache: true
	adaptive_mode: null
	action:
	hidden_size: 1024
	intermediate_size: 4096
	use_final_norm: true
	cache: false
	adaptive_mode: null
	time_hidden_size: 256
	num_hidden_layers: 18
	num_attention_heads: 8
	num_key_value_heads: 1
	head_dim: 256
	max_position_embeddings: 8192
	rms_norm_eps: 1.0e-06
	rope_theta: 10000.0
	attention_bias: false
	attention_dropout: 0.0