adora_oft_best2 / config.json
GvineQQ's picture
Upload OpenVLA grasp model checkpoint
f50690d verified
{
"norm_stats": {
"gen72_grasp_best2": {
"action": {
"mean": [
79.11602783203125,
94.15535736083984,
-80.4166259765625,
-93.67428588867188,
-3.3462841510772705,
17.82291603088379,
6.387060165405273,
78.9906997680664,
-82.27678680419922,
88.60293579101562,
82.36312103271484,
-95.4980239868164,
3.2222084999084473,
15.378264427185059,
1.2210661172866821,
80.26712799072266
],
"std": [
21.024213790893555,
5.583713531494141,
10.143651008605957,
2.8190293312072754,
6.1579060554504395,
12.958017349243164,
9.206350326538086,
38.70134735107422,
24.093891143798828,
10.70143985748291,
13.960134506225586,
3.1140949726104736,
12.975412368774414,
19.39619255065918,
11.416078567504883,
38.01255416870117
],
"max": [
112.63500213623047,
102.0,
-43.02000045776367,
-68.55000305175781,
20.434999465942383,
61.125,
46.845001220703125,
100.0,
-18.19499969482422,
102.0,
110.38999938964844,
-72.7699966430664,
71.93499755859375,
67.31999969482422,
42.23500061035156,
100.0
],
"min": [
31.375,
66.01000213623047,
-99.53500366210938,
-95.75499725341797,
-38.540000915527344,
-22.940000534057617,
-37.529998779296875,
0.0,
-115.30999755859375,
14.984999656677246,
18.020000457763672,
-97.47000122070312,
-22.719999313354492,
-23.639999389648438,
-41.84000015258789,
0.0
],
"q01": [
34.29199905395508,
78.30999755859375,
-93.43000030517578,
-95.44999694824219,
-20.040000915527344,
-3.5509999990463235,
-15.819999694824219,
0.0,
-109.94999694824219,
46.845001220703125,
35.15999984741211,
-97.29000091552734,
-15.1479998588562,
-12.739999771118164,
-30.791500282287597,
0.0
],
"q99": [
105.26450119018561,
102.0,
-49.8484996795654,
-80.68149948120117,
8.350000381469727,
47.290000915527344,
32.16999816894531,
100.0,
-30.818500328063934,
102.0,
100.80999755859375,
-81.88200302124021,
52.8650016784668,
55.61800079345706,
30.233500289916996,
100.0
],
"mask": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
]
},
"proprio": {
"mean": [
79.11497497558594,
94.18975830078125,
-80.38821411132812,
-93.6719970703125,
-3.3557398319244385,
17.77706527709961,
6.375845909118652,
78.99101257324219,
-82.22480773925781,
88.610107421875,
82.32734680175781,
-95.49593353271484,
3.2511658668518066,
15.379179000854492,
1.202197551727295,
80.26780700683594
],
"std": [
20.358299255371094,
5.266684055328369,
9.759184837341309,
2.6425156593322754,
5.872856140136719,
12.372116088867188,
8.679393768310547,
38.70150375366211,
23.474021911621094,
10.264312744140625,
13.466100692749023,
2.960315227508545,
12.600417137145996,
18.766708374023438,
11.020885467529297,
38.01284408569336
],
"max": [
108.94000244140625,
102.0199966430664,
-45.0099983215332,
-72.97000122070312,
14.890000343322754,
55.84000015258789,
46.47999954223633,
100.0,
-21.459999084472656,
102.01000213623047,
104.38999938964844,
-73.69999694824219,
62.630001068115234,
64.4000015258789,
40.790000915527344,
100.0
],
"min": [
32.54999923706055,
73.81999969482422,
-98.31999969482422,
-95.58000183105469,
-36.43000030517578,
-16.389999389648438,
-26.139999389648438,
0.0,
-111.0999984741211,
28.559999465942383,
19.8700008392334,
-97.37000274658203,
-16.200000762939453,
-18.959999084472656,
-39.83000183105469,
0.0
],
"q01": [
36.040000915527344,
79.12000274658203,
-92.80000305175781,
-95.37999725341797,
-19.156999778747558,
-0.11400000452995164,
-12.917000007629394,
0.0,
-109.78100128173828,
49.36499938964844,
36.233000946044925,
-97.2300033569336,
-14.706999969482421,
-12.258000183105468,
-30.653999710083006,
0.0
],
"q99": [
102.68300018310553,
102.01000213623047,
-51.226001358032214,
-81.30999755859375,
8.239999771118164,
45.500998687744165,
31.277000617980963,
100.0,
-31.829999351501392,
101.95999908447266,
100.08000183105469,
-82.35599975585936,
51.74799919128421,
53.9569995880127,
29.15399971008302,
100.0
]
},
"num_transitions": 16131,
"num_trajectories": 49
}
},
"n_action_bins": 256,
"vision_backbone_id": "dinosiglip-vit-so-224px",
"llm_backbone_id": "llama2-7b-pure",
"arch_specifier": "no-align+fused-gelu-mlp",
"output_projector_states": false,
"use_fused_vision_backbone": true,
"timm_model_ids": [
"vit_large_patch14_reg4_dinov2.lvd142m",
"vit_so400m_patch14_siglip_224"
],
"timm_override_act_layers": [
null,
null
],
"image_sizes": [
224,
224
],
"image_resize_strategy": "resize-naive",
"hf_llm_id": "meta-llama/Llama-2-7b-hf",
"llm_max_length": 2048,
"pad_token_id": 32000,
"pad_to_multiple_of": 64,
"text_config": {
"vocab_size": 32064,
"max_position_embeddings": 2048,
"hidden_size": 4096,
"intermediate_size": 11008,
"num_hidden_layers": 32,
"num_attention_heads": 32,
"num_key_value_heads": 32,
"hidden_act": "silu",
"initializer_range": 0.02,
"rms_norm_eps": 1e-06,
"pretraining_tp": 1,
"use_cache": true,
"rope_theta": 10000.0,
"rope_scaling": null,
"attention_bias": false,
"attention_dropout": 0.0,
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": false,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": null,
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": 1,
"pad_token_id": 32000,
"eos_token_id": 2,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "",
"model_type": "llama"
},
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": true,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": [
"OpenVLAForActionPrediction"
],
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": null,
"eos_token_id": null,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "/home/guangyu/.cache/huggingface/hub/models--openvla--openvla-7b/snapshots/31f090d05236101ebfc381b61c674dd4746d4ce0",
"transformers_version": "4.40.1",
"auto_map": {
"AutoConfig": "configuration_prismatic.OpenVLAConfig",
"AutoModelForVision2Seq": "modeling_prismatic.OpenVLAForActionPrediction"
},
"model_type": "openvla"
}