{ "type": "transformer_flow_matching", "n_obs_steps": 2, "input_features": { "observation.state": { "type": "STATE", "shape": [ 7 ] }, "observation.box": { "type": "STATE", "shape": [ 6, 6 ] }, "observation.images.gripper": { "type": "VISUAL", "shape": [ 3, 400, 640 ] }, "observation.images.front": { "type": "VISUAL", "shape": [ 3, 400, 640 ] }, "observation.images.right": { "type": "VISUAL", "shape": [ 3, 400, 640 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 7 ] } }, "device": "cuda", "use_amp": false, "push_to_hub": true, "repo_id": null, "private": null, "tags": null, "license": null, "pretrained_path": null, "horizon": 128, "n_action_steps": 128, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MEAN_STD", "ACTION": "MEAN_STD" }, "vision_input_size": 384, "num_cameras": 3, "num_vlm_layers": 16, "detection_classes": [ "cube", "container" ], "detection_conf": 0.1, "cameras_for_vision_state_concat": [ "observation.images.front", "observation.images.gripper", "observation.images.right" ], "state_dim": 7, "action_dim": 7, "d_model": 512, "nhead": 8, "num_decoder_layers": 16, "dim_feedforward": 2048, "num_inference_steps": 10, "noise_temporal_correlation": 0.0, "action_dim_weights": [ 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0 ], "pos_decay_lambda": 0.0, "future_steps_weight": 0.3, "optimizer_lr": 2.9390567251306506e-06, "optimizer_betas": [ 0.95, 0.999 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 1e-06, "scheduler_warmup_steps": 1500, "robot_encoder_tokens": 16, "robot_encoder_input_size": 224, "lora_rank": 16, "lora_alpha": 32, "lora_dropout": 0.05, "lora_target_modules": [ "q_proj", "v_proj" ], "vision_lora_num_layers": 8, "training_step": 121000, "training_epoch": 421, "current_lr": 2.9390567251306506e-06, "training_steps_total": 200000 }