GvineQQ's picture
Upload OpenVLA grasp model checkpoint
32ad3ae verified
{
"norm_stats": {
"gen72_grasp_stacking_cloth": {
"action": {
"mean": [
62.43017578125,
87.67520904541016,
-81.64241790771484,
-79.35697174072266,
-5.861265659332275,
10.143360137939453,
-2.90120792388916,
72.50067138671875,
-77.66656494140625,
86.92049407958984,
84.2964096069336,
-83.40449523925781,
5.118907928466797,
3.7968454360961914,
5.422410488128662,
86.0960922241211
],
"std": [
26.70451545715332,
15.426373481750488,
25.905000686645508,
19.544811248779297,
22.8100643157959,
23.047954559326172,
19.49391746520996,
41.80753707885742,
29.825439453125,
14.745803833007812,
21.135257720947266,
22.255817413330078,
18.003395080566406,
18.94203758239746,
16.219724655151367,
32.87356948852539
],
"max": [
166.1999969482422,
102.0,
18.280000686645508,
52.0,
169.0,
92.24500274658203,
152.92999267578125,
100.0,
52.90999984741211,
102.0,
169.0,
52.0,
121.50499725341797,
76.7300033569336,
119.13500213623047,
100.0
],
"min": [
-16.790000915527344,
-35.904998779296875,
-169.0,
-98.3499984741211,
-105.63999938964844,
-86.56999969482422,
-108.1050033569336,
0.0,
-146.47000122070312,
-50.494998931884766,
-37.04499816894531,
-97.7300033569336,
-157.6750030517578,
-82.2699966430664,
-140.4949951171875,
0.0
],
"q01": [
11.6899995803833,
30.454999923706055,
-126.7388484954834,
-96.11000061035156,
-66.4000015258789,
-51.33000183105469,
-55.10885036468506,
0.0,
-109.25,
28.17919984817505,
18.235000610351562,
-97.12000274658203,
-25.17080009460449,
-51.85499954223633,
-48.34000015258789,
0.0
],
"q99": [
102.73999786376953,
102.0,
-13.369199647902697,
1.790799961090833,
47.369998931884766,
66.31500244140625,
48.95500183105469,
100.0,
-2.109999895095825,
102.0,
116.62999725341797,
10.723850355148409,
61.6588496017457,
51.77000045776367,
44.34000015258789,
100.0
],
"mask": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
]
},
"proprio": {
"mean": [
62.37788391113281,
87.68936157226562,
-81.61447143554688,
-79.36709594726562,
-5.8816938400268555,
10.13486385345459,
-2.892655849456787,
72.50101470947266,
-77.6031723022461,
86.94501495361328,
84.27079772949219,
-83.39244079589844,
5.130640983581543,
3.7632524967193604,
5.420851707458496,
86.09623718261719
],
"std": [
25.871257781982422,
14.867486953735352,
25.252859115600586,
18.814409255981445,
22.16670036315918,
21.30392837524414,
18.7672061920166,
41.807701110839844,
29.216785430908203,
14.115077018737793,
20.53180503845215,
21.84934425354004,
17.31168556213379,
17.874422073364258,
15.709982872009277,
32.87362289428711
],
"max": [
160.5500030517578,
102.02999877929688,
13.489999771118164,
52.02000045776367,
159.24000549316406,
89.77999877929688,
143.4199981689453,
100.0,
50.060001373291016,
102.08999633789062,
169.00999450683594,
52.029998779296875,
116.55999755859375,
69.1500015258789,
107.02999877929688,
100.0
],
"min": [
-14.3100004196167,
-33.93000030517578,
-169.00999450683594,
-98.05000305175781,
-101.91000366210938,
-81.33000183105469,
-100.11000061035156,
0.0,
-141.3000030517578,
-48.31999969482422,
-29.790000915527344,
-97.66999816894531,
-147.30999755859375,
-80.5999984741211,
-131.22000122070312,
0.0
],
"q01": [
12.260000228881836,
32.06999969482422,
-125.55999755859375,
-95.9000015258789,
-64.44999694824219,
-46.470001220703125,
-52.880001068115234,
0.0,
-108.62000274658203,
30.662299900054933,
19.889999389648438,
-97.11000061035156,
-23.90999984741211,
-48.310001373291016,
-46.52769916534424,
0.0
],
"q99": [
101.80999755859375,
102.01000213623047,
-15.149999618530273,
-0.8115000146618113,
45.9900016784668,
62.34000015258789,
47.220001220703125,
100.0,
-3.6346001100536434,
102.01000213623047,
115.7300033569336,
9.609999656677246,
59.17769981384296,
48.88999938964844,
42.459999084472656,
100.0
]
},
"num_transitions": 296424,
"num_trajectories": 498
}
},
"n_action_bins": 256,
"vision_backbone_id": "dinosiglip-vit-so-224px",
"llm_backbone_id": "llama2-7b-pure",
"arch_specifier": "no-align+fused-gelu-mlp",
"output_projector_states": false,
"use_fused_vision_backbone": true,
"timm_model_ids": [
"vit_large_patch14_reg4_dinov2.lvd142m",
"vit_so400m_patch14_siglip_224"
],
"timm_override_act_layers": [
null,
null
],
"image_sizes": [
224,
224
],
"image_resize_strategy": "resize-naive",
"hf_llm_id": "meta-llama/Llama-2-7b-hf",
"llm_max_length": 2048,
"pad_token_id": 32000,
"pad_to_multiple_of": 64,
"text_config": {
"vocab_size": 32064,
"max_position_embeddings": 2048,
"hidden_size": 4096,
"intermediate_size": 11008,
"num_hidden_layers": 32,
"num_attention_heads": 32,
"num_key_value_heads": 32,
"hidden_act": "silu",
"initializer_range": 0.02,
"rms_norm_eps": 1e-06,
"pretraining_tp": 1,
"use_cache": true,
"rope_theta": 10000.0,
"rope_scaling": null,
"attention_bias": false,
"attention_dropout": 0.0,
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": false,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": null,
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": 1,
"pad_token_id": 32000,
"eos_token_id": 2,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "",
"model_type": "llama"
},
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": true,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": [
"OpenVLAForActionPrediction"
],
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": null,
"eos_token_id": null,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "openvla/openvla-7b",
"transformers_version": "4.40.1",
"auto_map": {
"AutoConfig": "openvla/openvla-7b--configuration_prismatic.OpenVLAConfig",
"AutoModelForVision2Seq": "openvla/openvla-7b--modeling_prismatic.OpenVLAForActionPrediction"
},
"model_type": "openvla"
}