Robotics
LeRobot
Safetensors
vla
Tsagkas's picture
Upload policy weights, train config and readme
5ec58b7 verified
{
"type": "vla",
"n_obs_steps": 1,
"input_features": {
"observation.state": {
"type": "STATE",
"shape": [
7
]
},
"observation.images.wrist": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.images.front": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.vfm_features.wrist": {
"type": "STATE",
"shape": [
16,
16,
768
]
},
"observation.vfm_features.front": {
"type": "STATE",
"shape": [
16,
16,
768
]
},
"observation.skill_feature": {
"type": "STATE",
"shape": [
1024
]
},
"observation.object_feature": {
"type": "STATE",
"shape": [
1024
]
},
"observation.vlm_features.wrist": {
"type": "STATE",
"shape": [
16,
16,
1024
]
},
"observation.vlm_features.front": {
"type": "STATE",
"shape": [
16,
16,
1024
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
7
]
}
},
"device": "cuda:0",
"use_amp": false,
"push_to_hub": true,
"repo_id": "Tsagkas/vla_state_modulation_v3_variability",
"private": null,
"tags": null,
"license": null,
"pretrained_path": null,
"num_cameras": 2,
"chunk_size": 100,
"n_action_steps": 100,
"state_dim": 7,
"action_dim": 7,
"state_hidden_dim": 256,
"state_output_dim": 768,
"num_transformer_heads": 12,
"transformer_hidden_dim": 768,
"transformer_expansion_factor": 4,
"attention_probs_dropout_prob": 0.0,
"hidden_dropout_prob": 0.1,
"temporal_ensemble_coeff": 0.01,
"optimizer_lr": 0.0001,
"optimizer_weight_decay": 0.0001,
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "MEAN_STD",
"ACTION": "MEAN_STD"
}
}