Sombit commited on
Commit
f73fcce
·
verified ·
1 Parent(s): a26cede

Upload TrajectoryVLA

Browse files
Files changed (1) hide show
  1. config.json +21 -59
config.json CHANGED
@@ -1,74 +1,36 @@
1
  {
2
  "arch_specifier": "no-align+gelu-mlp",
 
 
 
3
  "auto_map": {
4
- "AutoConfig": "prismatic_config.TrajectoryVLAConfig"
5
  },
6
- "cheat": false,
7
  "image_resize_strategy": "letterbox",
 
 
 
 
8
  "llm_backbone_id": "llama2-7b-pure",
9
  "llm_max_length": 2048,
10
- "model_type": "trajectoryvla",
11
- "num_timesteps": 6,
12
  "output_projector_states": false,
13
  "pad_to_multiple_of": 64,
14
  "pad_token_id": 32000,
15
- "prismatic_config": {
16
- "architectures": [
17
- "TrajectoryVLA"
18
- ],
19
- "auto_map": {
20
- "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
21
- },
22
- "model_type": "prismatic",
23
- "return_dict": false,
24
- "torch_dtype": "bfloat16"
25
- },
26
  "return_dict": false,
27
- "rotation_components": 9,
28
- "seperate_control_proj": true,
29
- "text_config": null,
30
- "timestep_proj_config": {
31
- "num_tokens": 3,
32
- "pos_embed_scale": 8,
33
- "proj_layers": [
34
- 128,
35
- 512,
36
- 1024
37
- ],
38
- "time_delta_sec": 0.1
39
- },
40
- "token_proj_config": {
41
- "control_tokens_layers": [
42
- 4096,
43
- 2048,
44
- 1024
45
- ],
46
- "image_tokens_mode": "vit",
47
- "llm_image_tokens_layers": [],
48
- "vit_tokens_layers": [
49
- 2176,
50
- 1024
51
- ]
52
- },
53
- "token_size": 1024,
54
- "transformer_config": {
55
- "decoder_block_config": {
56
- "dropout": 0.0,
57
- "feature_size": 1024,
58
- "head_dim": 64,
59
- "num_heads": 16
60
- },
61
- "encoder_block_config": {
62
- "feature_size": 1024,
63
- "head_dim": 64,
64
- "num_heads": 16
65
- },
66
- "num_blocks": 2,
67
- "pos_embed_config": {
68
- "embedding_dim": 1024,
69
- "num_embeddings": 300
70
- }
71
  },
 
 
 
 
 
 
 
 
 
72
  "transformers_version": "4.44.2",
73
  "use_fused_vision_backbone": true,
74
  "vision_backbone_id": "dinosiglip-vit-so-224px"
 
1
  {
2
  "arch_specifier": "no-align+gelu-mlp",
3
+ "architectures": [
4
+ "TrajectoryVLA"
5
+ ],
6
  "auto_map": {
7
+ "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
8
  },
9
+ "hf_llm_id": "meta-llama/Llama-2-7b-hf",
10
  "image_resize_strategy": "letterbox",
11
+ "image_sizes": [
12
+ 224,
13
+ 224
14
+ ],
15
  "llm_backbone_id": "llama2-7b-pure",
16
  "llm_max_length": 2048,
17
+ "model_type": "prismatic",
 
18
  "output_projector_states": false,
19
  "pad_to_multiple_of": 64,
20
  "pad_token_id": 32000,
 
 
 
 
 
 
 
 
 
 
 
21
  "return_dict": false,
22
+ "text_config": {
23
+ "model_type": "llama"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
+ "timm_model_ids": [
26
+ "vit_large_patch14_reg4_dinov2.lvd142m",
27
+ "vit_so400m_patch14_siglip_224"
28
+ ],
29
+ "timm_override_act_layers": [
30
+ null,
31
+ null
32
+ ],
33
+ "torch_dtype": "bfloat16",
34
  "transformers_version": "4.44.2",
35
  "use_fused_vision_backbone": true,
36
  "vision_backbone_id": "dinosiglip-vit-so-224px"