zeeshaan-ai commited on
Commit
6eed82a
·
verified ·
1 Parent(s): c75f2b2

Upload model

Browse files
added_tokens.json CHANGED
@@ -1,28 +1,3 @@
1
  {
2
- "</think>": 151668,
3
- "</tool_call>": 151658,
4
- "</tool_response>": 151666,
5
- "<think>": 151667,
6
- "<tool_call>": 151657,
7
- "<tool_response>": 151665,
8
- "<|box_end|>": 151649,
9
- "<|box_start|>": 151648,
10
- "<|endoftext|>": 151643,
11
- "<|file_sep|>": 151664,
12
- "<|fim_middle|>": 151660,
13
- "<|fim_pad|>": 151662,
14
- "<|fim_prefix|>": 151659,
15
- "<|fim_suffix|>": 151661,
16
- "<|im_end|>": 151645,
17
- "<|im_start|>": 151644,
18
- "<|image_pad|>": 151655,
19
- "<|object_ref_end|>": 151647,
20
- "<|object_ref_start|>": 151646,
21
- "<|quad_end|>": 151651,
22
- "<|quad_start|>": 151650,
23
- "<|repo_name|>": 151663,
24
- "<|video_pad|>": 151656,
25
- "<|vision_end|>": 151653,
26
- "<|vision_pad|>": 151654,
27
- "<|vision_start|>": 151652
28
  }
 
1
  {
2
+ "<image_soft_token>": 262144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
config.json CHANGED
@@ -1,89 +1,55 @@
1
  {
2
- "type": "smolvla",
3
- "n_obs_steps": 1,
4
- "input_features": {
5
- "observation.state": {
6
- "type": "STATE",
7
- "shape": [
8
- 6
9
- ]
10
- },
11
- "observation.images.side": {
12
- "type": "VISUAL",
13
- "shape": [
14
- 3,
15
- 1080,
16
- 1920
17
- ]
18
- },
19
- "observation.images.front": {
20
- "type": "VISUAL",
21
- "shape": [
22
- 3,
23
- 480,
24
- 640
25
- ]
26
- }
27
- },
28
- "output_features": {
29
- "action": {
30
- "type": "ACTION",
31
- "shape": [
32
- 6
33
- ]
34
- }
35
- },
36
- "device": "cuda",
37
- "use_amp": false,
38
- "push_to_hub": false,
39
- "repo_id": null,
40
- "private": null,
41
- "tags": null,
42
- "license": null,
43
- "pretrained_path": "lerobot/smolvla_base",
44
- "chunk_size": 50,
45
- "n_action_steps": 50,
46
- "normalization_mapping": {
47
- "VISUAL": "IDENTITY",
48
- "STATE": "MEAN_STD",
49
- "ACTION": "MEAN_STD"
50
- },
51
- "max_state_dim": 32,
52
- "max_action_dim": 32,
53
- "resize_imgs_with_padding": [
54
- 512,
55
- 512
56
  ],
57
- "empty_cameras": 0,
58
- "adapt_to_pi_aloha": false,
59
- "use_delta_joint_actions_aloha": false,
60
- "tokenizer_max_length": 48,
61
- "num_steps": 10,
62
- "use_cache": true,
63
- "freeze_vision_encoder": true,
64
- "train_expert_only": true,
65
- "train_state_proj": true,
66
- "optimizer_lr": 0.0001,
67
- "optimizer_betas": [
68
- 0.9,
69
- 0.95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  ],
71
- "optimizer_eps": 1e-08,
72
- "optimizer_weight_decay": 1e-10,
73
- "optimizer_grad_clip_norm": 10,
74
- "scheduler_warmup_steps": 1000,
75
- "scheduler_decay_steps": 30000,
76
- "scheduler_decay_lr": 2.5e-06,
77
- "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
78
- "load_vlm_weights": false,
79
- "add_image_special_tokens": false,
80
- "attention_mode": "cross_attn",
81
- "prefix_length": -1,
82
- "pad_language_to": "longest",
83
- "num_expert_layers": -1,
84
- "num_vlm_layers": 16,
85
- "self_attn_every_n_layers": 2,
86
- "expert_width_multiplier": 0.75,
87
- "min_period": 0.004,
88
- "max_period": 4.0
89
  }
 
1
  {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "torch_dtype": "bfloat16",
11
+ "eos_token_id": 106,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
  ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "unsloth_fixed": true,
51
+ "unsloth_version": "2026.2.1",
52
+ "use_bidirectional_attention": false,
53
+ "use_cache": true,
54
+ "vocab_size": 262144
 
55
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46b647e65da1188b7cec91c3a7f44daeacc01b6497f46acd31a3c4241c7f144e
3
- size 1197789224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81afd0920ea382d06cfe9d0f12af88e9ae8f15152937b2bbcf3ec2823539968c
3
+ size 536223056
special_tokens_map.json CHANGED
@@ -1,28 +1,30 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
  "eos_token": {
18
- "content": "<|im_end|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
24
  "pad_token": {
25
- "content": "<|vision_pad|>",
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
1
  {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
 
 
 
 
 
 
11
  "eos_token": {
12
+ "content": "<end_of_turn>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "image_token": "<image_soft_token>",
19
  "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff