Upload config
Browse files- config.json +59 -32
- prismatic_config.py +3 -30
config.json
CHANGED
|
@@ -1,37 +1,64 @@
|
|
| 1 |
{
|
| 2 |
-
"arch_specifier": "no-align+gelu-mlp",
|
| 3 |
-
"architectures": [
|
| 4 |
-
"TrajectoryVLA"
|
| 5 |
-
],
|
| 6 |
"auto_map": {
|
| 7 |
-
"
|
| 8 |
},
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
"text_config": {
|
| 23 |
-
"model_type": "llama"
|
| 24 |
},
|
| 25 |
-
"
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"auto_map": {
|
| 3 |
+
"AutoConfig": "prismatic_config.TrajectoryVLAConfig"
|
| 4 |
},
|
| 5 |
+
"cheat": false,
|
| 6 |
+
"model_type": "trajectoryvla",
|
| 7 |
+
"num_timesteps": 6,
|
| 8 |
+
"prismatic_config": {
|
| 9 |
+
"architectures": [
|
| 10 |
+
"TrajectoryVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {
|
| 13 |
+
"AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
|
| 14 |
+
},
|
| 15 |
+
"model_type": "prismatic",
|
| 16 |
+
"return_dict": false,
|
| 17 |
+
"torch_dtype": "bfloat16"
|
|
|
|
|
|
|
| 18 |
},
|
| 19 |
+
"rotation_components": 9,
|
| 20 |
+
"seperate_control_proj": true,
|
| 21 |
+
"timestep_proj_config": {
|
| 22 |
+
"num_tokens": 3,
|
| 23 |
+
"pos_embed_scale": 8,
|
| 24 |
+
"proj_layers": [
|
| 25 |
+
128,
|
| 26 |
+
512,
|
| 27 |
+
1024
|
| 28 |
+
],
|
| 29 |
+
"time_delta_sec": 0.1
|
| 30 |
+
},
|
| 31 |
+
"token_proj_config": {
|
| 32 |
+
"control_tokens_layers": [
|
| 33 |
+
4096,
|
| 34 |
+
2048,
|
| 35 |
+
1024
|
| 36 |
+
],
|
| 37 |
+
"image_tokens_mode": "vit",
|
| 38 |
+
"llm_image_tokens_layers": [],
|
| 39 |
+
"vit_tokens_layers": [
|
| 40 |
+
2176,
|
| 41 |
+
1024
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"token_size": 1024,
|
| 45 |
+
"transformer_config": {
|
| 46 |
+
"decoder_block_config": {
|
| 47 |
+
"dropout": 0.0,
|
| 48 |
+
"feature_size": 1024,
|
| 49 |
+
"head_dim": 64,
|
| 50 |
+
"num_heads": 16
|
| 51 |
+
},
|
| 52 |
+
"encoder_block_config": {
|
| 53 |
+
"feature_size": 1024,
|
| 54 |
+
"head_dim": 64,
|
| 55 |
+
"num_heads": 16
|
| 56 |
+
},
|
| 57 |
+
"num_blocks": 2,
|
| 58 |
+
"pos_embed_config": {
|
| 59 |
+
"embedding_dim": 1024,
|
| 60 |
+
"num_embeddings": 300
|
| 61 |
+
}
|
| 62 |
+
},
|
| 63 |
+
"transformers_version": "4.44.2"
|
| 64 |
}
|
prismatic_config.py
CHANGED
|
@@ -7,7 +7,7 @@ Default configuration specifies `siglip-224px+7b`.
|
|
| 7 |
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
import transformers
|
| 10 |
-
from transformers import PretrainedConfig
|
| 11 |
from transformers.models.auto import CONFIG_MAPPING
|
| 12 |
import numpy as np
|
| 13 |
|
|
@@ -155,7 +155,6 @@ class PrismaticConfig(PretrainedConfig):
|
|
| 155 |
|
| 156 |
# Here we need trajectory_vla config, with
|
| 157 |
# prismatic_config fields and then the waypointer fields
|
| 158 |
-
|
| 159 |
class TrajectoryVLAConfig(PretrainedConfig):
|
| 160 |
model_type: str = "trajectoryvla"
|
| 161 |
|
|
@@ -217,7 +216,8 @@ if __name__ == "__main__" :
|
|
| 217 |
|
| 218 |
# prismatic_config = PrismaticConfig()
|
| 219 |
# print(prismatic_config)
|
| 220 |
-
|
|
|
|
| 221 |
prismatic_config_dict = {
|
| 222 |
"vision_backbone_id":"dinosiglip-vit-so-224px",
|
| 223 |
"llm_backbone_id":"llama2-7b-pure",
|
|
@@ -280,30 +280,3 @@ if __name__ == "__main__" :
|
|
| 280 |
|
| 281 |
TrajectoryVLAConfig = TrajectoryVLAConfig( **TrajectoryVlaConfig_config)
|
| 282 |
print(TrajectoryVLAConfig)
|
| 283 |
-
|
| 284 |
-
# class WaypointTokenizer:
|
| 285 |
-
# """
|
| 286 |
-
# Wraps base LLM/VLM tokenizer and overloads least used token as a control token
|
| 287 |
-
|
| 288 |
-
# NOTE: By default, assumes a BPE-style tokenizer akin to the LlamaTokenizer,
|
| 289 |
-
# where *the least used tokens* appear at the end of the vocabulary!
|
| 290 |
-
|
| 291 |
-
# TODO: Adding new token vs overloading? When I call `tokenizer.add_token()` vocab stays the same
|
| 292 |
-
# """
|
| 293 |
-
|
| 294 |
-
# def __init__(self, tokenizer: transformers.PreTrainedTokenizerBase, num_tokens: int = 10) -> None:
|
| 295 |
-
# self.tokenizer = tokenizer
|
| 296 |
-
# self.num_tokens = num_tokens
|
| 297 |
-
|
| 298 |
-
# def __call__(self, *_) -> str:
|
| 299 |
-
# """Get the text token for control"""
|
| 300 |
-
# return self.tokenizer.decode(self.control_token_ids)
|
| 301 |
-
|
| 302 |
-
# @property
|
| 303 |
-
# def control_token_ids(self) -> np.ndarray:
|
| 304 |
-
# # Assumes we're overwriting the final tokens of the vocabulary (least used tokens)
|
| 305 |
-
# return np.arange(self.num_tokens) + int(self.tokenizer.vocab_size - self.num_tokens)
|
| 306 |
-
|
| 307 |
-
# @property
|
| 308 |
-
# def num_control_tokens(self) -> int:
|
| 309 |
-
# return self.num_tokens
|
|
|
|
| 7 |
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
import transformers
|
| 10 |
+
from transformers import PretrainedConfig,AutoModel, AutoConfig
|
| 11 |
from transformers.models.auto import CONFIG_MAPPING
|
| 12 |
import numpy as np
|
| 13 |
|
|
|
|
| 155 |
|
| 156 |
# Here we need trajectory_vla config, with
|
| 157 |
# prismatic_config fields and then the waypointer fields
|
|
|
|
| 158 |
class TrajectoryVLAConfig(PretrainedConfig):
|
| 159 |
model_type: str = "trajectoryvla"
|
| 160 |
|
|
|
|
| 216 |
|
| 217 |
# prismatic_config = PrismaticConfig()
|
| 218 |
# print(prismatic_config)
|
| 219 |
+
AutoConfig.register("prismatic",PrismaticConfig)
|
| 220 |
+
AutoConfig.register("trajectoryvla",TrajectoryVLAConfig)
|
| 221 |
prismatic_config_dict = {
|
| 222 |
"vision_backbone_id":"dinosiglip-vit-so-224px",
|
| 223 |
"llm_backbone_id":"llama2-7b-pure",
|
|
|
|
| 280 |
|
| 281 |
TrajectoryVLAConfig = TrajectoryVLAConfig( **TrajectoryVlaConfig_config)
|
| 282 |
print(TrajectoryVLAConfig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|