Upload OpenVLA grasp model checkpoint

32ad3ae verified 8 months ago

10.7 kB

	{
	"norm_stats": {
	"gen72_grasp_stacking_cloth": {
	"action": {
	"mean": [
	62.43017578125,
	87.67520904541016,
	-81.64241790771484,
	-79.35697174072266,
	-5.861265659332275,
	10.143360137939453,
	-2.90120792388916,
	72.50067138671875,
	-77.66656494140625,
	86.92049407958984,
	84.2964096069336,
	-83.40449523925781,
	5.118907928466797,
	3.7968454360961914,
	5.422410488128662,
	86.0960922241211
	],
	"std": [
	26.70451545715332,
	15.426373481750488,
	25.905000686645508,
	19.544811248779297,
	22.8100643157959,
	23.047954559326172,
	19.49391746520996,
	41.80753707885742,
	29.825439453125,
	14.745803833007812,
	21.135257720947266,
	22.255817413330078,
	18.003395080566406,
	18.94203758239746,
	16.219724655151367,
	32.87356948852539
	],
	"max": [
	166.1999969482422,
	102.0,
	18.280000686645508,
	52.0,
	169.0,
	92.24500274658203,
	152.92999267578125,
	100.0,
	52.90999984741211,
	102.0,
	169.0,
	52.0,
	121.50499725341797,
	76.7300033569336,
	119.13500213623047,
	100.0
	],
	"min": [
	-16.790000915527344,
	-35.904998779296875,
	-169.0,
	-98.3499984741211,
	-105.63999938964844,
	-86.56999969482422,
	-108.1050033569336,
	0.0,
	-146.47000122070312,
	-50.494998931884766,
	-37.04499816894531,
	-97.7300033569336,
	-157.6750030517578,
	-82.2699966430664,
	-140.4949951171875,
	0.0
	],
	"q01": [
	11.6899995803833,
	30.454999923706055,
	-126.7388484954834,
	-96.11000061035156,
	-66.4000015258789,
	-51.33000183105469,
	-55.10885036468506,
	0.0,
	-109.25,
	28.17919984817505,
	18.235000610351562,
	-97.12000274658203,
	-25.17080009460449,
	-51.85499954223633,
	-48.34000015258789,
	0.0
	],
	"q99": [
	102.73999786376953,
	102.0,
	-13.369199647902697,
	1.790799961090833,
	47.369998931884766,
	66.31500244140625,
	48.95500183105469,
	100.0,
	-2.109999895095825,
	102.0,
	116.62999725341797,
	10.723850355148409,
	61.6588496017457,
	51.77000045776367,
	44.34000015258789,
	100.0
	],
	"mask": [
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true,
	true
	]
	},
	"proprio": {
	"mean": [
	62.37788391113281,
	87.68936157226562,
	-81.61447143554688,
	-79.36709594726562,
	-5.8816938400268555,
	10.13486385345459,
	-2.892655849456787,
	72.50101470947266,
	-77.6031723022461,
	86.94501495361328,
	84.27079772949219,
	-83.39244079589844,
	5.130640983581543,
	3.7632524967193604,
	5.420851707458496,
	86.09623718261719
	],
	"std": [
	25.871257781982422,
	14.867486953735352,
	25.252859115600586,
	18.814409255981445,
	22.16670036315918,
	21.30392837524414,
	18.7672061920166,
	41.807701110839844,
	29.216785430908203,
	14.115077018737793,
	20.53180503845215,
	21.84934425354004,
	17.31168556213379,
	17.874422073364258,
	15.709982872009277,
	32.87362289428711
	],
	"max": [
	160.5500030517578,
	102.02999877929688,
	13.489999771118164,
	52.02000045776367,
	159.24000549316406,
	89.77999877929688,
	143.4199981689453,
	100.0,
	50.060001373291016,
	102.08999633789062,
	169.00999450683594,
	52.029998779296875,
	116.55999755859375,
	69.1500015258789,
	107.02999877929688,
	100.0
	],
	"min": [
	-14.3100004196167,
	-33.93000030517578,
	-169.00999450683594,
	-98.05000305175781,
	-101.91000366210938,
	-81.33000183105469,
	-100.11000061035156,
	0.0,
	-141.3000030517578,
	-48.31999969482422,
	-29.790000915527344,
	-97.66999816894531,
	-147.30999755859375,
	-80.5999984741211,
	-131.22000122070312,
	0.0
	],
	"q01": [
	12.260000228881836,
	32.06999969482422,
	-125.55999755859375,
	-95.9000015258789,
	-64.44999694824219,
	-46.470001220703125,
	-52.880001068115234,
	0.0,
	-108.62000274658203,
	30.662299900054933,
	19.889999389648438,
	-97.11000061035156,
	-23.90999984741211,
	-48.310001373291016,
	-46.52769916534424,
	0.0
	],
	"q99": [
	101.80999755859375,
	102.01000213623047,
	-15.149999618530273,
	-0.8115000146618113,
	45.9900016784668,
	62.34000015258789,
	47.220001220703125,
	100.0,
	-3.6346001100536434,
	102.01000213623047,
	115.7300033569336,
	9.609999656677246,
	59.17769981384296,
	48.88999938964844,
	42.459999084472656,
	100.0
	]
	},
	"num_transitions": 296424,
	"num_trajectories": 498
	}
	},
	"n_action_bins": 256,
	"vision_backbone_id": "dinosiglip-vit-so-224px",
	"llm_backbone_id": "llama2-7b-pure",
	"arch_specifier": "no-align+fused-gelu-mlp",
	"output_projector_states": false,
	"use_fused_vision_backbone": true,
	"timm_model_ids": [
	"vit_large_patch14_reg4_dinov2.lvd142m",
	"vit_so400m_patch14_siglip_224"
	],
	"timm_override_act_layers": [
	null,
	null
	],
	"image_sizes": [
	224,
	224
	],
	"image_resize_strategy": "resize-naive",
	"hf_llm_id": "meta-llama/Llama-2-7b-hf",
	"llm_max_length": 2048,
	"pad_token_id": 32000,
	"pad_to_multiple_of": 64,
	"text_config": {
	"vocab_size": 32064,
	"max_position_embeddings": 2048,
	"hidden_size": 4096,
	"intermediate_size": 11008,
	"num_hidden_layers": 32,
	"num_attention_heads": 32,
	"num_key_value_heads": 32,
	"hidden_act": "silu",
	"initializer_range": 0.02,
	"rms_norm_eps": 1e-06,
	"pretraining_tp": 1,
	"use_cache": true,
	"rope_theta": 10000.0,
	"rope_scaling": null,
	"attention_bias": false,
	"attention_dropout": 0.0,
	"return_dict": true,
	"output_hidden_states": false,
	"output_attentions": false,
	"torchscript": false,
	"torch_dtype": "bfloat16",
	"use_bfloat16": false,
	"tf_legacy_loss": false,
	"pruned_heads": {},
	"tie_word_embeddings": false,
	"chunk_size_feed_forward": 0,
	"is_encoder_decoder": false,
	"is_decoder": false,
	"cross_attention_hidden_size": null,
	"add_cross_attention": false,
	"tie_encoder_decoder": false,
	"max_length": 20,
	"min_length": 0,
	"do_sample": false,
	"early_stopping": false,
	"num_beams": 1,
	"num_beam_groups": 1,
	"diversity_penalty": 0.0,
	"temperature": 1.0,
	"top_k": 50,
	"top_p": 1.0,
	"typical_p": 1.0,
	"repetition_penalty": 1.0,
	"length_penalty": 1.0,
	"no_repeat_ngram_size": 0,
	"encoder_no_repeat_ngram_size": 0,
	"bad_words_ids": null,
	"num_return_sequences": 1,
	"output_scores": false,
	"return_dict_in_generate": false,
	"forced_bos_token_id": null,
	"forced_eos_token_id": null,
	"remove_invalid_values": false,
	"exponential_decay_length_penalty": null,
	"suppress_tokens": null,
	"begin_suppress_tokens": null,
	"architectures": null,
	"finetuning_task": null,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"tokenizer_class": null,
	"prefix": null,
	"bos_token_id": 1,
	"pad_token_id": 32000,
	"eos_token_id": 2,
	"sep_token_id": null,
	"decoder_start_token_id": null,
	"task_specific_params": null,
	"problem_type": null,
	"_name_or_path": "",
	"model_type": "llama"
	},
	"return_dict": true,
	"output_hidden_states": false,
	"output_attentions": false,
	"torchscript": false,
	"torch_dtype": "bfloat16",
	"use_bfloat16": false,
	"tf_legacy_loss": false,
	"pruned_heads": {},
	"tie_word_embeddings": true,
	"chunk_size_feed_forward": 0,
	"is_encoder_decoder": false,
	"is_decoder": false,
	"cross_attention_hidden_size": null,
	"add_cross_attention": false,
	"tie_encoder_decoder": false,
	"max_length": 20,
	"min_length": 0,
	"do_sample": false,
	"early_stopping": false,
	"num_beams": 1,
	"num_beam_groups": 1,
	"diversity_penalty": 0.0,
	"temperature": 1.0,
	"top_k": 50,
	"top_p": 1.0,
	"typical_p": 1.0,
	"repetition_penalty": 1.0,
	"length_penalty": 1.0,
	"no_repeat_ngram_size": 0,
	"encoder_no_repeat_ngram_size": 0,
	"bad_words_ids": null,
	"num_return_sequences": 1,
	"output_scores": false,
	"return_dict_in_generate": false,
	"forced_bos_token_id": null,
	"forced_eos_token_id": null,
	"remove_invalid_values": false,
	"exponential_decay_length_penalty": null,
	"suppress_tokens": null,
	"begin_suppress_tokens": null,
	"architectures": [
	"OpenVLAForActionPrediction"
	],
	"finetuning_task": null,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"tokenizer_class": null,
	"prefix": null,
	"bos_token_id": null,
	"eos_token_id": null,
	"sep_token_id": null,
	"decoder_start_token_id": null,
	"task_specific_params": null,
	"problem_type": null,
	"_name_or_path": "openvla/openvla-7b",
	"transformers_version": "4.40.1",
	"auto_map": {
	"AutoConfig": "openvla/openvla-7b--configuration_prismatic.OpenVLAConfig",
	"AutoModelForVision2Seq": "openvla/openvla-7b--modeling_prismatic.OpenVLAForActionPrediction"
	},
	"model_type": "openvla"
	}