File size: 2,135 Bytes
8331f55
3a954b5
73f20f6
 
 
 
 
 
 
8331f55
73f20f6
 
 
 
 
 
 
8331f55
73f20f6
 
 
 
 
 
 
8331f55
73f20f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8331f55
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "type": "smolvla",
  "n_obs_steps": 1,
  "input_features": {
    "observation.state": {
      "type": "STATE",
      "shape": [
        18
      ]
    },
    "observation.images.head": {
      "type": "VISUAL",
      "shape": [
        3,
        360,
        640
      ]
    },
    "observation.images.left_wrist": {
      "type": "VISUAL",
      "shape": [
        3,
        360,
        640
      ]
    },
    "observation.images.right_wrist": {
      "type": "VISUAL",
      "shape": [
        3,
        360,
        640
      ]
    }
  },
  "output_features": {
    "action": {
      "type": "ACTION",
      "shape": [
        18
      ]
    }
  },
  "device": "cuda",
  "use_amp": true,
  "use_peft": false,
  "push_to_hub": true,
  "repo_id": "Odog16/block_sorting_SmolVLA_policy",
  "private": null,
  "tags": null,
  "license": null,
  "pretrained_path": "lerobot/smolvla_base",
  "chunk_size": 30,
  "n_action_steps": 30,
  "normalization_mapping": {
    "VISUAL": "IDENTITY",
    "STATE": "MEAN_STD",
    "ACTION": "MEAN_STD"
  },
  "max_state_dim": 32,
  "max_action_dim": 32,
  "resize_imgs_with_padding": [
    512,
    512
  ],
  "empty_cameras": 0,
  "adapt_to_pi_aloha": false,
  "use_delta_joint_actions_aloha": false,
  "tokenizer_max_length": 48,
  "num_steps": 10,
  "use_cache": true,
  "freeze_vision_encoder": true,
  "train_expert_only": true,
  "train_state_proj": true,
  "optimizer_lr": 0.0001,
  "optimizer_betas": [
    0.9,
    0.95
  ],
  "optimizer_eps": 1e-08,
  "optimizer_weight_decay": 1e-10,
  "optimizer_grad_clip_norm": 10,
  "scheduler_warmup_steps": 500,
  "scheduler_decay_steps": 18000,
  "scheduler_decay_lr": 2.5e-06,
  "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
  "load_vlm_weights": true,
  "add_image_special_tokens": false,
  "attention_mode": "cross_attn",
  "prefix_length": -1,
  "pad_language_to": "longest",
  "num_expert_layers": -1,
  "num_vlm_layers": 16,
  "self_attn_every_n_layers": 2,
  "expert_width_multiplier": 0.75,
  "min_period": 0.004,
  "max_period": 4.0,
  "rtc_config": null,
  "compile_model": false,
  "compile_mode": "max-autotune"
}