| { | |
| "type": "lbm_policy", | |
| "chunk_size": 16, | |
| "n_action_steps": 8, | |
| "n_obs_steps": 2, | |
| "freq": 10.0, | |
| "use_relative_action": true, | |
| "use_relative_pose": false, | |
| "normalization_mapping": { | |
| "visual": "min_max", | |
| "state": "min_max", | |
| "action_pose_gripper": "min_max" | |
| }, | |
| "input_features": { | |
| "observation_images_left_wrist": { | |
| "shape": [ | |
| 3, | |
| 224, | |
| 224 | |
| ], | |
| "dtype": "visual", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_images_right_wrist": { | |
| "shape": [ | |
| 3, | |
| 224, | |
| 224 | |
| ], | |
| "dtype": "visual", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_images_head": { | |
| "shape": [ | |
| 3, | |
| 224, | |
| 224 | |
| ], | |
| "dtype": "visual", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_images_chest": { | |
| "shape": [ | |
| 3, | |
| 224, | |
| 224 | |
| ], | |
| "dtype": "visual", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states": { | |
| "shape": [ | |
| 34 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": { | |
| "observation_states_ee_pose_left": { | |
| "shape": [ | |
| 9 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": "rotation_6d", | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states_joint_angle_left": { | |
| "shape": [ | |
| 7 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states_gripper_left": { | |
| "shape": [ | |
| 1 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states_ee_pose_right": { | |
| "shape": [ | |
| 9 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": "rotation_6d", | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states_joint_angle_right": { | |
| "shape": [ | |
| 7 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "observation_states_gripper_right": { | |
| "shape": [ | |
| 1 | |
| ], | |
| "dtype": "state", | |
| "fps": null, | |
| "rep": null, | |
| "rep_kwargs": null, | |
| "compose": null | |
| } | |
| } | |
| } | |
| }, | |
| "output_features": { | |
| "action": { | |
| "shape": [ | |
| 20 | |
| ], | |
| "dtype": "action_pose_gripper", | |
| "fps": null, | |
| "rep": "rotation_6d", | |
| "rep_kwargs": null, | |
| "compose": { | |
| "action_left": { | |
| "shape": [ | |
| 10 | |
| ], | |
| "dtype": "action_pose_gripper", | |
| "fps": null, | |
| "rep": "rotation_6d", | |
| "rep_kwargs": null, | |
| "compose": null | |
| }, | |
| "action_right": { | |
| "shape": [ | |
| 10 | |
| ], | |
| "dtype": "action_pose_gripper", | |
| "fps": null, | |
| "rep": "rotation_6d", | |
| "rep_kwargs": null, | |
| "compose": null | |
| } | |
| } | |
| } | |
| }, | |
| "device": "cuda", | |
| "use_amp": false, | |
| "mixed_precision": null, | |
| "data_transform_override": null, | |
| "max_action_dim": 20, | |
| "share_obs_encoder": true, | |
| "obs_encoder_group": null, | |
| "noise_scheduler": { | |
| "type": "fm_beta", | |
| "num_train_timesteps": 10, | |
| "alpha": 1.5, | |
| "beta": 1.0, | |
| "s": 0.999, | |
| "clip_sample": false, | |
| "clip_sample_range": 1.0, | |
| "flow_sig_min": 0.0, | |
| "num_inference_steps": 10, | |
| "prediction_type": "velocity", | |
| "path_reversed": false | |
| }, | |
| "vision_encoder": { | |
| "type": "clip_hf_vision_encoder", | |
| "model_name": "openai/clip-vit-base-patch16", | |
| "tune_vision_encoder": true, | |
| "extract_cls_token": true, | |
| "output_projection_dim": null | |
| }, | |
| "text_encoder": { | |
| "type": "clip_hf_text_encoder", | |
| "model_name": "openai/clip-vit-base-patch32", | |
| "tune_text_encoder": false, | |
| "tune_projection_layer": true, | |
| "output_projection_dim": 768, | |
| "extract_eos_token": true | |
| }, | |
| "action_head": { | |
| "type": "lbm_action_head", | |
| "noise_scheduler": { | |
| "type": "flow_matching_beta", | |
| "num_inference_steps": 10, | |
| "alpha": 1.5, | |
| "beta": 1.0, | |
| "s": 0.999, | |
| "input_perturb": 0.0, | |
| "sampling_path": "euler", | |
| "clip_sample": false, | |
| "clip_sample_range": null, | |
| "path_reversed": false | |
| }, | |
| "num_layers": 16, | |
| "hidden_size": 768, | |
| "num_attention_heads": 16, | |
| "dropout": 0.1, | |
| "final_dropout": true, | |
| "max_timestep_buckets": 1000, | |
| "conditioning_dim": 6980, | |
| "max_state_dim": 34, | |
| "max_action_dim": 20, | |
| "max_chunk_size": 16 | |
| }, | |
| "proprioception_dim": 34, | |
| "optimizer_lr": 0.0001, | |
| "optimizer_betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "optimizer_eps": 1e-08, | |
| "optimizer_weight_decay": 1e-06, | |
| "optimizer_vision_lr": 0.0001, | |
| "optimizer_vision_weight_decay": 1e-06, | |
| "scheduler_name": "cosine", | |
| "scheduler_warmup_steps": 500 | |
| } |