MyFastWAM / config.yaml

Add files using upload-large-folder tool

c37e0dd verified 11 days ago

5.17 kB

	output_dir: /datadrive/wjy/ckpt/fastwam_track_libero_0526
	batch_size: 12
	num_workers: 8
	lr_scheduler_type: cosine
	learning_rate: 0.0001
	num_epochs: 10
	max_steps: 20000
	log_every: 10
	save_every: 2000
	eval_every: 200
	eval_num_inference_steps: 10
	gradient_accumulation_steps: 1
	mixed_precision: bf16
	seed: 42
	max_grad_norm: 1.0
	weight_decay: 0.01
	resume: null
	wandb:
	enabled: true
	workspace: null
	project: fast-wam
	name: libero_track_2cam224_1e-4
	group: null
	mode: online
	data:
	train:
	_target_: fastwam.datasets.lerobot.track_robot_video_dataset.TrackRobotVideoDataset
	dataset_dirs:
	- /datadrive2/wjy/dataset/LIBERO_fastwam_with_marker/libero_spatial
	- /datadrive2/wjy/dataset/LIBERO_fastwam_with_marker/libero_object
	- /datadrive2/wjy/dataset/LIBERO_fastwam_with_marker/libero_goal
	- /datadrive2/wjy/dataset/LIBERO_fastwam_with_marker/libero_10
	track_episodes_file: /datadrive2/wjy/dataset/LIBERO_fastwam_with_marker/full_whitelist.txt
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 13
	shape: 13
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_frames: 33
	global_sample_stride: 1
	action_video_freq_ratio: 4
	video_size:
	- 224
	- 448
	camera_key: null
	val_set_proportion: 0.0
	is_training_set: true
	skip_padding_as_possible: false
	concat_multi_camera: horizontal
	processor:
	_target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 13
	shape: 13
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_obs_steps: 33
	num_output_cameras: 2
	action_output_dim: 13
	proprio_output_dim: 8
	delta_action_dim_mask:
	default:
	- true
	- true
	- true
	- true
	- true
	- true
	- false
	- false
	- false
	- false
	- false
	- false
	- false
	action_state_transforms: null
	use_stepwise_action_norm: false
	norm_default_mode: min/max
	norm_exception_mode: null
	identity_dim_mask:
	action:
	default:
	- false
	- false
	- false
	- false
	- false
	- false
	- false
	- true
	- true
	- true
	- true
	- true
	- true
	action_state_merger:
	_target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
	train_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	val_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	text_embedding_cache_dir: ./data/text_embeds_cache/libero
	context_len: 128
	model:
	_target_: fastwam.runtime.create_fastwam_track
	model_id: Wan-AI/Wan2.2-TI2V-5B
	tokenizer_model_id: Wan-AI/Wan2.1-T2V-1.3B
	tokenizer_max_len: 128
	load_text_encoder: false
	proprio_dim: 8
	redirect_common_files: true
	mot_checkpoint_mixed_attn: false
	action_dit_pretrained_path: checkpoints/ActionDiT_linear_interp_Wan22_alphascale_1024hdim.pt
	skip_dit_load_from_pretrain: false
	video_dit_config:
	has_image_input: false
	patch_size:
	- 1
	- 2
	- 2
	in_dim: 48
	hidden_dim: 3072
	ffn_dim: 14336
	freq_dim: 256
	text_dim: 4096
	out_dim: 48
	num_heads: 24
	attn_head_dim: 128
	num_layers: 30
	eps: 1.0e-06
	seperated_timestep: true
	require_clip_embedding: false
	require_vae_embedding: false
	fuse_vae_embedding_in_latents: true
	use_gradient_checkpointing: false
	video_attention_mask_mode: first_frame_causal
	action_conditioned: false
	action_dim: 13
	action_group_causal_mask_mode: group_diagonal
	action_dit_config:
	action_dim: 13
	hidden_dim: 1024
	ffn_dim: 4096
	num_heads: 24
	attn_head_dim: 128
	num_layers: 30
	text_dim: 4096
	freq_dim: 256
	eps: 1.0e-06
	use_gradient_checkpointing: false
	video_scheduler:
	train_shift: 5.0
	infer_shift: 5.0
	num_train_timesteps: 1000
	action_scheduler:
	train_shift: 5.0
	infer_shift: 5.0
	num_train_timesteps: 1000
	prediction_type: velocity
	loss:
	lambda_action: 1.0
	lambda_track: 1.0
	EVALUATION:
	flip_mode: vertical