GvineQQ's picture
Upload OpenVLA grasp model checkpoint
8edabbb verified
{
"norm_stats": {
"gen72_grasp_stacking_baskets": {
"action": {
"mean": [
71.42466735839844,
84.38833618164062,
-82.9383316040039,
-85.52547454833984,
-4.3938679695129395,
19.08216094970703,
0.308868408203125,
67.15239715576172,
-80.31741333007812,
84.10924530029297,
86.23428344726562,
-86.4471206665039,
3.9559240341186523,
9.594415664672852,
4.726510524749756,
82.21428680419922
],
"std": [
25.26982879638672,
15.808586120605469,
20.259897232055664,
21.52882957458496,
20.819337844848633,
20.531246185302734,
26.965940475463867,
45.217315673828125,
27.104999542236328,
12.850805282592773,
14.046908378601074,
17.79497718811035,
13.857259750366211,
17.137197494506836,
17.71614646911621,
36.83033752441406
],
"max": [
124.62999725341797,
102.0,
12.920000076293945,
52.0,
119.18000030517578,
89.77999877929688,
169.0,
100.0,
3.9600000381469727,
102.0,
123.30999755859375,
52.0,
73.56500244140625,
66.08999633789062,
86.5250015258789,
100.0
],
"min": [
5.84499979019165,
-38.275001525878906,
-169.0,
-97.47000122070312,
-113.19999694824219,
-65.30000305175781,
-139.75,
0.0,
-121.68499755859375,
5.84499979019165,
-8.654999732971191,
-98.16999816894531,
-55.459999084472656,
-78.44499969482422,
-92.29000091552734,
0.0
],
"q01": [
11.34000015258789,
35.595001220703125,
-127.60875129699707,
-96.5,
-68.33499908447266,
-37.86875057220459,
-82.83499908447266,
0.0,
-109.64374732971191,
33.56999969482422,
33.582499504089355,
-96.94000244140625,
-22.323750495910645,
-32.78499984741211,
-46.05500030517578,
0.0
],
"q99": [
103.84500122070312,
102.0,
-23.510000228881836,
24.047500133514404,
58.91999912261963,
69.83000183105469,
63.060001373291016,
100.0,
-10.866249561309814,
102.0,
108.41500091552734,
2.14000004529953,
54.625,
51.900001525878906,
61.040000915527344,
100.0
],
"mask": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
]
},
"proprio": {
"mean": [
71.39665222167969,
84.42801666259766,
-82.94181823730469,
-85.53479766845703,
-4.377756118774414,
19.067413330078125,
0.31575945019721985,
67.15303802490234,
-80.27192687988281,
84.11940002441406,
86.19857788085938,
-86.43647766113281,
3.944101572036743,
9.555686950683594,
4.720884323120117,
82.21434783935547
],
"std": [
24.659215927124023,
15.453649520874023,
19.847105026245117,
21.23726463317871,
20.293472290039062,
19.495216369628906,
26.316822052001953,
45.21790313720703,
26.583589553833008,
12.41045093536377,
13.648248672485352,
17.389333724975586,
13.373165130615234,
16.093711853027344,
17.135786056518555,
36.83036804199219
],
"max": [
124.08000183105469,
102.05000305175781,
10.850000381469727,
52.0099983215332,
109.37000274658203,
87.58000183105469,
168.66000366210938,
100.0,
3.8399999141693115,
102.01000213623047,
119.8499984741211,
41.31999969482422,
68.77999877929688,
65.19999694824219,
82.41999816894531,
100.0
],
"min": [
6.940000057220459,
-35.029998779296875,
-169.00999450683594,
-97.37999725341797,
-111.45999908447266,
-62.58000183105469,
-136.9199981689453,
0.0,
-120.04000091552734,
10.140000343322754,
-3.2699999809265137,
-97.66000366210938,
-54.2599983215332,
-64.5,
-89.81999969482422,
0.0
],
"q01": [
11.512500286102295,
36.560001373291016,
-126.5099983215332,
-96.41000366210938,
-66.58749771118164,
-35.62750053405762,
-81.84500122070312,
0.0,
-109.31999969482422,
37.10499858856201,
34.77500057220459,
-96.91999816894531,
-21.260000228881836,
-29.440000534057617,
-45.08750057220459,
0.0
],
"q99": [
102.95499992370605,
102.01000213623047,
-25.047500610351562,
23.78499937057495,
56.52000045776367,
67.02999877929688,
61.95750045776367,
100.0,
-11.315000295639038,
102.01000213623047,
107.51000213623047,
0.6450000107288361,
53.470001220703125,
49.709999084472656,
59.13999938964844,
100.0
]
},
"num_transitions": 74826,
"num_trajectories": 200
}
},
"n_action_bins": 256,
"vision_backbone_id": "dinosiglip-vit-so-224px",
"llm_backbone_id": "llama2-7b-pure",
"arch_specifier": "no-align+fused-gelu-mlp",
"output_projector_states": false,
"use_fused_vision_backbone": true,
"timm_model_ids": [
"vit_large_patch14_reg4_dinov2.lvd142m",
"vit_so400m_patch14_siglip_224"
],
"timm_override_act_layers": [
null,
null
],
"image_sizes": [
224,
224
],
"image_resize_strategy": "resize-naive",
"hf_llm_id": "meta-llama/Llama-2-7b-hf",
"llm_max_length": 2048,
"pad_token_id": 32000,
"pad_to_multiple_of": 64,
"text_config": {
"vocab_size": 32064,
"max_position_embeddings": 2048,
"hidden_size": 4096,
"intermediate_size": 11008,
"num_hidden_layers": 32,
"num_attention_heads": 32,
"num_key_value_heads": 32,
"hidden_act": "silu",
"initializer_range": 0.02,
"rms_norm_eps": 1e-06,
"pretraining_tp": 1,
"use_cache": true,
"rope_theta": 10000.0,
"rope_scaling": null,
"attention_bias": false,
"attention_dropout": 0.0,
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": false,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": null,
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": 1,
"pad_token_id": 32000,
"eos_token_id": 2,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "",
"model_type": "llama"
},
"return_dict": true,
"output_hidden_states": false,
"output_attentions": false,
"torchscript": false,
"torch_dtype": "bfloat16",
"use_bfloat16": false,
"tf_legacy_loss": false,
"pruned_heads": {},
"tie_word_embeddings": true,
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"is_decoder": false,
"cross_attention_hidden_size": null,
"add_cross_attention": false,
"tie_encoder_decoder": false,
"max_length": 20,
"min_length": 0,
"do_sample": false,
"early_stopping": false,
"num_beams": 1,
"num_beam_groups": 1,
"diversity_penalty": 0.0,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"repetition_penalty": 1.0,
"length_penalty": 1.0,
"no_repeat_ngram_size": 0,
"encoder_no_repeat_ngram_size": 0,
"bad_words_ids": null,
"num_return_sequences": 1,
"output_scores": false,
"return_dict_in_generate": false,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"remove_invalid_values": false,
"exponential_decay_length_penalty": null,
"suppress_tokens": null,
"begin_suppress_tokens": null,
"architectures": [
"OpenVLAForActionPrediction"
],
"finetuning_task": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"tokenizer_class": null,
"prefix": null,
"bos_token_id": null,
"eos_token_id": null,
"sep_token_id": null,
"decoder_start_token_id": null,
"task_specific_params": null,
"problem_type": null,
"_name_or_path": "/home/guangyu/.cache/huggingface/hub/models--openvla--openvla-7b/snapshots/31f090d05236101ebfc381b61c674dd4746d4ce0",
"transformers_version": "4.40.1",
"auto_map": {
"AutoConfig": "configuration_prismatic.OpenVLAConfig",
"AutoModelForVision2Seq": "modeling_prismatic.OpenVLAForActionPrediction"
},
"model_type": "openvla"
}