{
    "type": "lavla",
    "n_obs_steps": 1,
    "input_features": {
        "observation.state": {
            "type": "STATE",
            "shape": [
                6
            ]
        },
        "observation.images.camera1": {
            "type": "VISUAL",
            "shape": [
                3,
                480,
                640
            ]
        },
        "observation.images.camera2": {
            "type": "VISUAL",
            "shape": [
                3,
                480,
                640
            ]
        },
        "observation.images.camera3": {
            "type": "VISUAL",
            "shape": [
                3,
                480,
                640
            ]
        }
    },
    "output_features": {
        "action": {
            "type": "ACTION",
            "shape": [
                6
            ]
        }
    },
    "device": "cuda",
    "use_amp": false,
    "use_peft": false,
    "push_to_hub": true,
    "repo_id": "Alkatt/LAVLA_S1_integ_test",
    "private": null,
    "tags": null,
    "license": null,
    "pretrained_path": null,
    "normalization_mapping": {
        "VISUAL": "MEAN_STD",
        "STATE": "MEAN_STD",
        "ACTION": "MEAN_STD"
    },
    "n_action_steps": 50,
    "chunk_size": 50,
    "max_action_dim": 32,
    "s1_image_resolution": [
        256,
        256
    ],
    "s1_mlp_dim": 512,
    "s1_embedding_dim": 512,
    "s1_num_attention_heads": 4,
    "s1_num_layers": 6,
    "s1_flow_integration_steps": 10,
    "s1_mobile_vit_model": "apple/mobilevit-small",
    "s2_embedding_dim": 1024,
    "optimizer_lr": 1e-05,
    "optimizer_weight_decay": 0.0001
}