Robotics
LeRobot
Safetensors
smolvla
File size: 2,366 Bytes
354e746
 
6c9d1bc
354e746
 
 
 
e2c9f06
354e746
 
18626d7
354e746
 
92240d5
e2c9f06
 
354e746
 
18626d7
354e746
 
c6d3ae5
e2c9f06
 
354e746
88ffcc3
 
 
 
 
d532b97
 
88ffcc3
354e746
 
 
 
 
 
e2c9f06
354e746
 
 
 
 
 
88ffcc3
354e746
 
 
4d306f0
6c9d1bc
 
354e746
 
d532b97
 
354e746
 
 
 
 
 
 
 
 
 
 
 
 
 
6c9d1bc
354e746
d44d847
354e746
 
 
 
 
 
bcab612
354e746
 
 
 
 
 
 
 
 
 
 
 
 
 
91985c3
354e746
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{
    "type": "smolvla",
    "n_obs_steps": 1,
    "input_features": {
        "observation.state": {
            "type": "STATE",
            "shape": [
                7
            ]
        },
        "observation.images.camera1": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        },
        "observation.images.camera2": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        },
        "observation.images.camera3": {
            "type": "VISUAL",
            "shape": [
                3,
                400,
                640
            ]
        }
    },
    "output_features": {
        "action": {
            "type": "ACTION",
            "shape": [
                7
            ]
        }
    },
    "device": "cuda",
    "use_amp": false,
    "push_to_hub": true,
    "repo_id": "ISdept/smolvla-piper",
    "private": null,
    "tags": null,
    "license": null,
    "pretrained_path": "ISdept/smolvla-piper",
    "chunk_size": 16,
    "n_action_steps": 16,
    "normalization_mapping": {
        "VISUAL": "IDENTITY",
        "STATE": "MEAN_STD",
        "ACTION": "MEAN_STD"
    },
    "max_state_dim": 32,
    "max_action_dim": 32,
    "resize_imgs_with_padding": [
        512,
        512
    ],
    "empty_cameras": 0,
    "adapt_to_pi_aloha": false,
    "use_delta_joint_actions_aloha": false,
    "tokenizer_max_length": 48,
    "num_steps": 10,
    "use_cache": true,
    "freeze_vision_encoder": true,
    "train_expert_only": false,
    "train_state_proj": true,
    "optimizer_lr": 1e-05,
    "optimizer_betas": [
        0.9,
        0.95
    ],
    "optimizer_eps": 1e-08,
    "optimizer_weight_decay": 1e-10,
    "optimizer_grad_clip_norm": 10.0,
    "scheduler_warmup_steps": 1000,
    "scheduler_decay_steps": 30000,
    "scheduler_decay_lr": 2.5e-06,
    "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
    "load_vlm_weights": true,
    "add_image_special_tokens": false,
    "attention_mode": "cross_attn",
    "prefix_length": 0,
    "pad_language_to": "max_length",
    "num_expert_layers": 0,
    "num_vlm_layers": 16,
    "self_attn_every_n_layers": 2,
    "expert_width_multiplier": 0.75,
    "min_period": 0.004,
    "max_period": 4.0
}