File size: 2,680 Bytes

{
    "type": "transformer_flow_matching",
    "n_obs_steps": 2,
    "input_features": {
        "observation.state": {
            "type": "STATE",
            "shape": [
                7
            ]
        },
        "observation.box": {
            "type": "STATE",
            "shape": [
                6,
                6
            ]
        },
        "observation.images.gripper": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        },
        "observation.images.front": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        },
        "observation.images.right": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        }
    },
    "output_features": {
        "action": {
            "type": "ACTION",
            "shape": [
                7
            ]
        }
    },
    "device": "cuda",
    "use_amp": false,
    "push_to_hub": true,
    "repo_id": null,
    "private": null,
    "tags": null,
    "license": null,
    "pretrained_path": null,
    "horizon": 64,
    "n_action_steps": 64,
    "normalization_mapping": {
        "VISUAL": "IDENTITY",
        "STATE": "MEAN_STD",
        "ACTION": "MEAN_STD"
    },
    "vision_input_size": 384,
    "num_cameras": 3,
    "num_vlm_layers": 16,
    "detection_classes": [
        "cube",
        "container"
    ],
    "detection_conf": 0.1,
    "cameras_for_vision_state_concat": [
        "observation.images.front",
        "observation.images.gripper",
        "observation.images.right"
    ],
    "state_dim": 7,
    "action_dim": 7,
    "d_model": 512,
    "nhead": 8,
    "num_decoder_layers": 16,
    "dim_feedforward": 2048,
    "num_inference_steps": 10,
    "noise_temporal_correlation": 0.0,
    "action_dim_weights": [
        1.0,
        1.0,
        1.0,
        0.0,
        1.0,
        1.0,
        1.0
    ],
    "pos_decay_lambda": 0.0,
    "future_steps_weight": 0.3,
    "optimizer_lr": 2.8e-05,
    "optimizer_betas": [
        0.95,
        0.999
    ],
    "optimizer_eps": 1e-08,
    "optimizer_weight_decay": 1e-06,
    "scheduler_warmup_steps": 1500,
    "robot_encoder_tokens": 16,
    "robot_encoder_input_size": 224,
    "lora_rank": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "lora_target_modules": [
        "q_proj",
        "v_proj"
    ],
    "vision_lora_num_layers": 8,
    "training_step": 119000,
    "training_epoch": 414,
    "current_lr": 8.581771368787766e-06,
    "training_steps_total": 200000
}