Add files using upload-large-folder tool
Browse files- 1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/config.json +315 -0
- 1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/dataset_statistics.json +634 -0
- 1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/model.safetensors +3 -0
- 1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/singlevla_config/config.json +227 -0
- 1e-4/twinvla-scratch-1e-4-aloha_handover_box/config.json +314 -0
- 1e-4/twinvla-scratch-1e-4-aloha_handover_box/dataset_statistics.json +634 -0
- 1e-4/twinvla-scratch-1e-4-aloha_handover_box/model.safetensors +3 -0
- 1e-4/twinvla-scratch-1e-4-aloha_handover_box/singlevla_config/config.json +227 -0
- 1e-4/twinvla-scratch-1e-4-aloha_lift_box/config.json +314 -0
- 1e-4/twinvla-scratch-1e-4-aloha_lift_box/dataset_statistics.json +634 -0
- 1e-4/twinvla-scratch-1e-4-aloha_lift_box/model.safetensors +3 -0
- 1e-4/twinvla-scratch-1e-4-aloha_lift_box/singlevla_config/config.json +227 -0
- 1e-4/twinvla-scratch-1e-4-aloha_shoes_table/config.json +314 -0
- 1e-4/twinvla-scratch-1e-4-aloha_shoes_table/dataset_statistics.json +634 -0
- 1e-4/twinvla-scratch-1e-4-aloha_shoes_table/model.safetensors +3 -0
- 1e-4/twinvla-scratch-1e-4-aloha_shoes_table/singlevla_config/config.json +227 -0
- 2e-5/twinvla-aloha_shoes_table/config.json +317 -0
- 2e-5/twinvla-aloha_shoes_table/dataset_statistics.json +634 -0
- 2e-5/twinvla-aloha_shoes_table/model.safetensors +3 -0
- 2e-5/twinvla-aloha_shoes_table/singlevla_config/config.json +230 -0
- 2e-5/twinvla-scratch-aloha_dish_drainer/config.json +314 -0
- 2e-5/twinvla-scratch-aloha_dish_drainer/dataset_statistics.json +634 -0
- 2e-5/twinvla-scratch-aloha_dish_drainer/model.safetensors +3 -0
- 2e-5/twinvla-scratch-aloha_dish_drainer/singlevla_config/config.json +227 -0
- 2e-5/twinvla-scratch-aloha_handover_box/config.json +314 -0
- 2e-5/twinvla-scratch-aloha_handover_box/dataset_statistics.json +634 -0
- 2e-5/twinvla-scratch-aloha_handover_box/model.safetensors +3 -0
- 2e-5/twinvla-scratch-aloha_handover_box/singlevla_config/config.json +227 -0
- 2e-5/twinvla-scratch-aloha_handover_box/training_states.pth +3 -0
- 2e-5/twinvla-scratch-aloha_lift_box/config.json +314 -0
- 2e-5/twinvla-scratch-aloha_lift_box/dataset_statistics.json +634 -0
- 2e-5/twinvla-scratch-aloha_lift_box/model.safetensors +3 -0
- 2e-5/twinvla-scratch-aloha_lift_box/singlevla_config/config.json +227 -0
- 2e-5/twinvla-scratch-aloha_lift_box/training_states.pth +3 -0
1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/config.json
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/tabletop-v3/twinvla-scratch-1e-4-aloha_dish_drainer",
|
| 3 |
+
"action_dim": 10,
|
| 4 |
+
"action_head": "DiT",
|
| 5 |
+
"action_len": 20,
|
| 6 |
+
"architectures": [
|
| 7 |
+
"Eagle2_1BTwinVLA"
|
| 8 |
+
],
|
| 9 |
+
"attn_reweighting": true,
|
| 10 |
+
"denoiser": "FM",
|
| 11 |
+
"dit_scratch": false,
|
| 12 |
+
"global_normalization": true,
|
| 13 |
+
"hz_interpolate": null,
|
| 14 |
+
"interpolate_gripper": false,
|
| 15 |
+
"knowledge_insulation": false,
|
| 16 |
+
"model_path": null,
|
| 17 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 18 |
+
"modeling": "denoising",
|
| 19 |
+
"normalization": "quantile",
|
| 20 |
+
"num_readouts": 1,
|
| 21 |
+
"readout_token_as_eos": true,
|
| 22 |
+
"share_decoder": true,
|
| 23 |
+
"share_embed_tokens": true,
|
| 24 |
+
"share_vision": true,
|
| 25 |
+
"singlevla_config": {
|
| 26 |
+
"_attn_implementation_autoset": false,
|
| 27 |
+
"_attn_implementation_internal": null,
|
| 28 |
+
"_commit_hash": null,
|
| 29 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/tabletop-v3/twinvla-scratch-1e-4-aloha_dish_drainer/singlevla_config",
|
| 30 |
+
"action_dim": 10,
|
| 31 |
+
"action_head": "DiT",
|
| 32 |
+
"action_head_hidden_dim": 1024,
|
| 33 |
+
"action_len": 20,
|
| 34 |
+
"add_cross_attention": false,
|
| 35 |
+
"aggregation": "None",
|
| 36 |
+
"architectures": [
|
| 37 |
+
"Eagle2_1BVLA"
|
| 38 |
+
],
|
| 39 |
+
"auto_map": {},
|
| 40 |
+
"bad_words_ids": null,
|
| 41 |
+
"begin_suppress_tokens": null,
|
| 42 |
+
"bos_token_id": null,
|
| 43 |
+
"chunk_size_feed_forward": 0,
|
| 44 |
+
"cross_attention_hidden_size": null,
|
| 45 |
+
"decoder_start_token_id": null,
|
| 46 |
+
"denoiser": "FM",
|
| 47 |
+
"diffusion_batch": 32,
|
| 48 |
+
"dit_size": "DiT-B",
|
| 49 |
+
"diversity_penalty": 0.0,
|
| 50 |
+
"do_sample": false,
|
| 51 |
+
"downsample_ratio": 0.5,
|
| 52 |
+
"dynamic_image_size": true,
|
| 53 |
+
"early_stopping": false,
|
| 54 |
+
"efficient_loss": true,
|
| 55 |
+
"enable_cfg": true,
|
| 56 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 57 |
+
"eos_token_id": null,
|
| 58 |
+
"exponential_decay_length_penalty": null,
|
| 59 |
+
"finetuning_task": null,
|
| 60 |
+
"force_image_size": 448,
|
| 61 |
+
"forced_bos_token_id": null,
|
| 62 |
+
"forced_eos_token_id": null,
|
| 63 |
+
"global_normalization": true,
|
| 64 |
+
"id2label": {
|
| 65 |
+
"0": "LABEL_0",
|
| 66 |
+
"1": "LABEL_1"
|
| 67 |
+
},
|
| 68 |
+
"image_size": 448,
|
| 69 |
+
"is_decoder": false,
|
| 70 |
+
"is_encoder_decoder": false,
|
| 71 |
+
"keep_aspect_ratio": false,
|
| 72 |
+
"knowledge_insulation": false,
|
| 73 |
+
"label2id": {
|
| 74 |
+
"LABEL_0": 0,
|
| 75 |
+
"LABEL_1": 1
|
| 76 |
+
},
|
| 77 |
+
"length_penalty": 1.0,
|
| 78 |
+
"llm_config": {
|
| 79 |
+
"_attn_implementation_autoset": true,
|
| 80 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 81 |
+
"add_cross_attention": false,
|
| 82 |
+
"architectures": [
|
| 83 |
+
"Qwen2ForCausalLM"
|
| 84 |
+
],
|
| 85 |
+
"attention_dropout": 0.0,
|
| 86 |
+
"auto_map": {
|
| 87 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 88 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 89 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 90 |
+
},
|
| 91 |
+
"bad_words_ids": null,
|
| 92 |
+
"begin_suppress_tokens": null,
|
| 93 |
+
"bos_token_id": 151643,
|
| 94 |
+
"chunk_size_feed_forward": 0,
|
| 95 |
+
"cross_attention_hidden_size": null,
|
| 96 |
+
"decoder_start_token_id": null,
|
| 97 |
+
"diversity_penalty": 0.0,
|
| 98 |
+
"do_sample": false,
|
| 99 |
+
"early_stopping": false,
|
| 100 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 101 |
+
"eos_token_id": 151645,
|
| 102 |
+
"exponential_decay_length_penalty": null,
|
| 103 |
+
"finetuning_task": null,
|
| 104 |
+
"forced_bos_token_id": null,
|
| 105 |
+
"forced_eos_token_id": null,
|
| 106 |
+
"hidden_act": "silu",
|
| 107 |
+
"hidden_size": 896,
|
| 108 |
+
"id2label": {
|
| 109 |
+
"0": "LABEL_0",
|
| 110 |
+
"1": "LABEL_1"
|
| 111 |
+
},
|
| 112 |
+
"initializer_range": 0.02,
|
| 113 |
+
"intermediate_size": 4864,
|
| 114 |
+
"is_decoder": false,
|
| 115 |
+
"is_encoder_decoder": false,
|
| 116 |
+
"label2id": {
|
| 117 |
+
"LABEL_0": 0,
|
| 118 |
+
"LABEL_1": 1
|
| 119 |
+
},
|
| 120 |
+
"length_penalty": 1.0,
|
| 121 |
+
"max_length": 20,
|
| 122 |
+
"max_position_embeddings": 32768,
|
| 123 |
+
"max_window_layers": 21,
|
| 124 |
+
"min_length": 0,
|
| 125 |
+
"model_type": "qwen2",
|
| 126 |
+
"no_repeat_ngram_size": 0,
|
| 127 |
+
"num_attention_heads": 14,
|
| 128 |
+
"num_beam_groups": 1,
|
| 129 |
+
"num_beams": 1,
|
| 130 |
+
"num_hidden_layers": 24,
|
| 131 |
+
"num_key_value_heads": 2,
|
| 132 |
+
"num_return_sequences": 1,
|
| 133 |
+
"output_attentions": false,
|
| 134 |
+
"output_hidden_states": false,
|
| 135 |
+
"output_scores": false,
|
| 136 |
+
"pad_token_id": null,
|
| 137 |
+
"prefix": null,
|
| 138 |
+
"problem_type": null,
|
| 139 |
+
"pruned_heads": {},
|
| 140 |
+
"remove_invalid_values": false,
|
| 141 |
+
"repetition_penalty": 1.0,
|
| 142 |
+
"return_dict": true,
|
| 143 |
+
"return_dict_in_generate": false,
|
| 144 |
+
"rms_norm_eps": 1e-06,
|
| 145 |
+
"rope_scaling": null,
|
| 146 |
+
"rope_theta": 1000000.0,
|
| 147 |
+
"sep_token_id": null,
|
| 148 |
+
"sliding_window": 32768,
|
| 149 |
+
"suppress_tokens": null,
|
| 150 |
+
"task_specific_params": null,
|
| 151 |
+
"temperature": 1.0,
|
| 152 |
+
"tf_legacy_loss": false,
|
| 153 |
+
"tie_encoder_decoder": false,
|
| 154 |
+
"tie_word_embeddings": true,
|
| 155 |
+
"tokenizer_class": null,
|
| 156 |
+
"top_k": 50,
|
| 157 |
+
"top_p": 1.0,
|
| 158 |
+
"torch_dtype": "bfloat16",
|
| 159 |
+
"torchscript": false,
|
| 160 |
+
"transformers_version": "4.50.0.dev0",
|
| 161 |
+
"typical_p": 1.0,
|
| 162 |
+
"use_bfloat16": false,
|
| 163 |
+
"use_cache": false,
|
| 164 |
+
"use_sliding_window": false,
|
| 165 |
+
"vocab_size": 151674
|
| 166 |
+
},
|
| 167 |
+
"loss_version": "v4",
|
| 168 |
+
"max_dynamic_patch": 12,
|
| 169 |
+
"max_length": 20,
|
| 170 |
+
"min_dynamic_patch": 1,
|
| 171 |
+
"min_length": 0,
|
| 172 |
+
"mlp_checkpoint": true,
|
| 173 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 174 |
+
"model_type": "Eagle2_1BVLA",
|
| 175 |
+
"modeling": "denoising",
|
| 176 |
+
"no_repeat_ngram_size": 0,
|
| 177 |
+
"normalization": "quantile",
|
| 178 |
+
"num_beam_groups": 1,
|
| 179 |
+
"num_beams": 1,
|
| 180 |
+
"num_readouts": 1,
|
| 181 |
+
"num_return_sequences": 1,
|
| 182 |
+
"output_attentions": false,
|
| 183 |
+
"output_hidden_states": false,
|
| 184 |
+
"output_scores": false,
|
| 185 |
+
"pad2square": false,
|
| 186 |
+
"pad_token_id": null,
|
| 187 |
+
"pre_feature_reduction": false,
|
| 188 |
+
"prefix": null,
|
| 189 |
+
"problem_type": null,
|
| 190 |
+
"pruned_heads": {},
|
| 191 |
+
"ps_version": "v2",
|
| 192 |
+
"readout_token_as_eos": true,
|
| 193 |
+
"remove_invalid_values": false,
|
| 194 |
+
"repetition_penalty": 1.0,
|
| 195 |
+
"return_dict": true,
|
| 196 |
+
"return_dict_in_generate": false,
|
| 197 |
+
"return_text": null,
|
| 198 |
+
"select_layer": -1,
|
| 199 |
+
"sep_token_id": null,
|
| 200 |
+
"state_dim": 10,
|
| 201 |
+
"stopping_token": "|",
|
| 202 |
+
"suppress_tokens": null,
|
| 203 |
+
"task_specific_params": null,
|
| 204 |
+
"temperature": 1.0,
|
| 205 |
+
"template": "qwen2-chat",
|
| 206 |
+
"test_denoising_steps": 10,
|
| 207 |
+
"tf_legacy_loss": false,
|
| 208 |
+
"tie_encoder_decoder": false,
|
| 209 |
+
"tie_word_embeddings": true,
|
| 210 |
+
"tokenizer_class": null,
|
| 211 |
+
"top_k": 50,
|
| 212 |
+
"top_p": 1.0,
|
| 213 |
+
"torch_dtype": "bfloat16",
|
| 214 |
+
"torchscript": false,
|
| 215 |
+
"train_denoising_steps": 100,
|
| 216 |
+
"typical_p": 1.0,
|
| 217 |
+
"use_backbone_lora": 0,
|
| 218 |
+
"use_bfloat16": false,
|
| 219 |
+
"use_llm_lora": 0,
|
| 220 |
+
"use_thumbnail": true,
|
| 221 |
+
"vision_config": {
|
| 222 |
+
"_attn_implementation_autoset": true,
|
| 223 |
+
"_name_or_path": "",
|
| 224 |
+
"add_cross_attention": false,
|
| 225 |
+
"architectures": [
|
| 226 |
+
"SiglipVisionModel"
|
| 227 |
+
],
|
| 228 |
+
"attention_dropout": 0.0,
|
| 229 |
+
"auto_map": {
|
| 230 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 231 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 232 |
+
},
|
| 233 |
+
"bad_words_ids": null,
|
| 234 |
+
"begin_suppress_tokens": null,
|
| 235 |
+
"bos_token_id": null,
|
| 236 |
+
"chunk_size_feed_forward": 0,
|
| 237 |
+
"cross_attention_hidden_size": null,
|
| 238 |
+
"decoder_start_token_id": null,
|
| 239 |
+
"diversity_penalty": 0.0,
|
| 240 |
+
"do_sample": false,
|
| 241 |
+
"drop_path_rate": 0.1,
|
| 242 |
+
"early_stopping": false,
|
| 243 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 244 |
+
"eos_token_id": null,
|
| 245 |
+
"exponential_decay_length_penalty": null,
|
| 246 |
+
"finetuning_task": null,
|
| 247 |
+
"forced_bos_token_id": null,
|
| 248 |
+
"forced_eos_token_id": null,
|
| 249 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 250 |
+
"hidden_size": 1152,
|
| 251 |
+
"id2label": {
|
| 252 |
+
"0": "LABEL_0",
|
| 253 |
+
"1": "LABEL_1"
|
| 254 |
+
},
|
| 255 |
+
"image_size": 448,
|
| 256 |
+
"intermediate_size": 4304,
|
| 257 |
+
"is_decoder": false,
|
| 258 |
+
"is_encoder_decoder": false,
|
| 259 |
+
"label2id": {
|
| 260 |
+
"LABEL_0": 0,
|
| 261 |
+
"LABEL_1": 1
|
| 262 |
+
},
|
| 263 |
+
"layer_norm_eps": 1e-06,
|
| 264 |
+
"length_penalty": 1.0,
|
| 265 |
+
"max_length": 20,
|
| 266 |
+
"min_length": 0,
|
| 267 |
+
"model_type": "siglip_vision_model",
|
| 268 |
+
"no_repeat_ngram_size": 0,
|
| 269 |
+
"num_attention_heads": 16,
|
| 270 |
+
"num_beam_groups": 1,
|
| 271 |
+
"num_beams": 1,
|
| 272 |
+
"num_channels": 3,
|
| 273 |
+
"num_hidden_layers": 27,
|
| 274 |
+
"num_image_tokens": 1024,
|
| 275 |
+
"num_return_sequences": 1,
|
| 276 |
+
"output_attentions": false,
|
| 277 |
+
"output_hidden_states": false,
|
| 278 |
+
"output_scores": false,
|
| 279 |
+
"pad_token_id": null,
|
| 280 |
+
"patch_size": 14,
|
| 281 |
+
"prefix": null,
|
| 282 |
+
"problem_type": null,
|
| 283 |
+
"projection_dim": 2048,
|
| 284 |
+
"projector_hidden_act": "gelu_fast",
|
| 285 |
+
"pruned_heads": {},
|
| 286 |
+
"remove_invalid_values": false,
|
| 287 |
+
"repetition_penalty": 1.0,
|
| 288 |
+
"return_dict": true,
|
| 289 |
+
"return_dict_in_generate": false,
|
| 290 |
+
"sep_token_id": null,
|
| 291 |
+
"suppress_tokens": null,
|
| 292 |
+
"task_specific_params": null,
|
| 293 |
+
"temperature": 1.0,
|
| 294 |
+
"tf_legacy_loss": false,
|
| 295 |
+
"tie_encoder_decoder": false,
|
| 296 |
+
"tie_word_embeddings": true,
|
| 297 |
+
"tokenizer_class": null,
|
| 298 |
+
"top_k": 50,
|
| 299 |
+
"top_p": 1.0,
|
| 300 |
+
"torch_dtype": "bfloat16",
|
| 301 |
+
"torchscript": false,
|
| 302 |
+
"transformers_version": "4.50.0.dev0",
|
| 303 |
+
"typical_p": 1.0,
|
| 304 |
+
"use_bfloat16": false,
|
| 305 |
+
"vision_use_head": false
|
| 306 |
+
},
|
| 307 |
+
"vocab_size": 151674,
|
| 308 |
+
"vocab_start": null
|
| 309 |
+
},
|
| 310 |
+
"singlevla_config_path": "/scratch2/jellyho/rebuttal/tabletop-v3/twinvla-scratch-1e-4-aloha_dish_drainer/singlevla_config",
|
| 311 |
+
"singlevla_pretrained_path": null,
|
| 312 |
+
"state_dim": 10,
|
| 313 |
+
"torch_dtype": "bfloat16",
|
| 314 |
+
"transformers_version": "4.50.0.dev0"
|
| 315 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_dish_drainer": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.40166154503822327,
|
| 6 |
+
-0.01112140342593193,
|
| 7 |
+
0.1838103085756302,
|
| 8 |
+
0.7193062901496887,
|
| 9 |
+
0.2537333369255066,
|
| 10 |
+
-0.5631371736526489,
|
| 11 |
+
-0.14408059418201447,
|
| 12 |
+
0.8170215487480164,
|
| 13 |
+
0.1759030818939209,
|
| 14 |
+
0.3052484393119812,
|
| 15 |
+
0.27496495842933655,
|
| 16 |
+
0.07536163926124573,
|
| 17 |
+
0.11210401356220245,
|
| 18 |
+
0.5866137146949768,
|
| 19 |
+
0.16616441309452057,
|
| 20 |
+
-0.6815541982650757,
|
| 21 |
+
-0.029566079378128052,
|
| 22 |
+
0.9651421308517456,
|
| 23 |
+
0.16927561163902283,
|
| 24 |
+
-0.015535339713096619
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.11050384491682053,
|
| 28 |
+
0.09560801833868027,
|
| 29 |
+
0.07149424403905869,
|
| 30 |
+
0.16429583728313446,
|
| 31 |
+
0.23607666790485382,
|
| 32 |
+
0.13553044199943542,
|
| 33 |
+
0.4136315882205963,
|
| 34 |
+
0.16760702431201935,
|
| 35 |
+
0.28564009070396423,
|
| 36 |
+
0.9522888660430908,
|
| 37 |
+
0.031309906393289566,
|
| 38 |
+
0.04574710130691528,
|
| 39 |
+
0.08567056804895401,
|
| 40 |
+
0.298023521900177,
|
| 41 |
+
0.15602006018161774,
|
| 42 |
+
0.22492952644824982,
|
| 43 |
+
0.10802315920591354,
|
| 44 |
+
0.04161505028605461,
|
| 45 |
+
0.15993013978004456,
|
| 46 |
+
0.9998809099197388
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.6568294763565063,
|
| 50 |
+
0.20922525227069855,
|
| 51 |
+
0.329291433095932,
|
| 52 |
+
0.9988790154457092,
|
| 53 |
+
0.8221861720085144,
|
| 54 |
+
-0.02126980759203434,
|
| 55 |
+
0.554952085018158,
|
| 56 |
+
0.9999961256980896,
|
| 57 |
+
0.8352594971656799,
|
| 58 |
+
1.0,
|
| 59 |
+
0.3725535273551941,
|
| 60 |
+
0.20133008062839508,
|
| 61 |
+
0.2683204710483551,
|
| 62 |
+
0.9969081878662109,
|
| 63 |
+
0.5947288274765015,
|
| 64 |
+
0.135818213224411,
|
| 65 |
+
0.297533243894577,
|
| 66 |
+
0.9999833106994629,
|
| 67 |
+
0.6284497380256653,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1679762601852417,
|
| 72 |
+
-0.2037276178598404,
|
| 73 |
+
0.026118876412510872,
|
| 74 |
+
0.06734701991081238,
|
| 75 |
+
-0.3303077816963196,
|
| 76 |
+
-0.865761399269104,
|
| 77 |
+
-0.9697803854942322,
|
| 78 |
+
0.24385260045528412,
|
| 79 |
+
-0.3337814211845398,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.17690593004226685,
|
| 82 |
+
-0.019342761486768723,
|
| 83 |
+
-0.045900676399469376,
|
| 84 |
+
-0.08388058096170425,
|
| 85 |
+
-0.1825810670852661,
|
| 86 |
+
-0.9999706149101257,
|
| 87 |
+
-0.4282298684120178,
|
| 88 |
+
0.7756603956222534,
|
| 89 |
+
-0.19046637415885925,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.22041156470775605,
|
| 94 |
+
-0.17958899974822998,
|
| 95 |
+
0.04473079532384872,
|
| 96 |
+
0.2284090793132782,
|
| 97 |
+
-0.2088965356349945,
|
| 98 |
+
-0.811203727722168,
|
| 99 |
+
-0.9306126594543457,
|
| 100 |
+
0.3530711317062378,
|
| 101 |
+
-0.2207678198814392,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.2055837804079056,
|
| 104 |
+
0.005079864375293255,
|
| 105 |
+
-0.04285515695810318,
|
| 106 |
+
0.023393160849809646,
|
| 107 |
+
-0.12909780085086822,
|
| 108 |
+
-0.9969730639457702,
|
| 109 |
+
-0.31871861577033994,
|
| 110 |
+
0.8128526282310485,
|
| 111 |
+
-0.12555764615535736,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.6256548881530761,
|
| 116 |
+
0.16506216526031484,
|
| 117 |
+
0.3053938007354736,
|
| 118 |
+
0.9577780866622918,
|
| 119 |
+
0.7322160029411315,
|
| 120 |
+
-0.22335838973522296,
|
| 121 |
+
0.43161325573921194,
|
| 122 |
+
0.9983374524116516,
|
| 123 |
+
0.7683744573593138,
|
| 124 |
+
1.0,
|
| 125 |
+
0.344862767457962,
|
| 126 |
+
0.19341405749320983,
|
| 127 |
+
0.24164194464683514,
|
| 128 |
+
0.9684402346611023,
|
| 129 |
+
0.5674381494522094,
|
| 130 |
+
-0.24195577383041442,
|
| 131 |
+
0.2379095745086669,
|
| 132 |
+
0.9997554516792297,
|
| 133 |
+
0.564831252098083,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.39952757954597473,
|
| 162 |
+
-0.01743784174323082,
|
| 163 |
+
0.17103438079357147,
|
| 164 |
+
0.7105360627174377,
|
| 165 |
+
0.238600954413414,
|
| 166 |
+
-0.5759879350662231,
|
| 167 |
+
-0.11663737148046494,
|
| 168 |
+
0.815979540348053,
|
| 169 |
+
0.1842789500951767,
|
| 170 |
+
0.32329148054122925,
|
| 171 |
+
0.2642972469329834,
|
| 172 |
+
0.056828975677490234,
|
| 173 |
+
0.10836686193943024,
|
| 174 |
+
0.5435150265693665,
|
| 175 |
+
0.10053255409002304,
|
| 176 |
+
-0.7011978030204773,
|
| 177 |
+
-0.03383756801486015,
|
| 178 |
+
0.950905442237854,
|
| 179 |
+
0.0682743638753891,
|
| 180 |
+
-0.11205191910266876
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.1054687574505806,
|
| 184 |
+
0.09407484531402588,
|
| 185 |
+
0.07594858109951019,
|
| 186 |
+
0.15545178949832916,
|
| 187 |
+
0.23370550572872162,
|
| 188 |
+
0.1663108915090561,
|
| 189 |
+
0.417312353849411,
|
| 190 |
+
0.1589633673429489,
|
| 191 |
+
0.295290470123291,
|
| 192 |
+
0.8386000990867615,
|
| 193 |
+
0.03193666413426399,
|
| 194 |
+
0.03702628239989281,
|
| 195 |
+
0.08499231189489365,
|
| 196 |
+
0.33746662735939026,
|
| 197 |
+
0.13817644119262695,
|
| 198 |
+
0.2642515301704407,
|
| 199 |
+
0.13742688298225403,
|
| 200 |
+
0.10328594595193863,
|
| 201 |
+
0.24581065773963928,
|
| 202 |
+
0.988041341304779
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.6216338276863098,
|
| 206 |
+
0.1681845635175705,
|
| 207 |
+
0.3582729399204254,
|
| 208 |
+
0.9998778104782104,
|
| 209 |
+
0.7569742202758789,
|
| 210 |
+
0.29317960143089294,
|
| 211 |
+
0.5474420785903931,
|
| 212 |
+
1.0,
|
| 213 |
+
0.9644882678985596,
|
| 214 |
+
1.2399240732192993,
|
| 215 |
+
0.36810019612312317,
|
| 216 |
+
0.15229015052318573,
|
| 217 |
+
0.3755773603916168,
|
| 218 |
+
0.9999530911445618,
|
| 219 |
+
0.47173869609832764,
|
| 220 |
+
0.4396477937698364,
|
| 221 |
+
0.5856077671051025,
|
| 222 |
+
1.0,
|
| 223 |
+
0.9141661524772644,
|
| 224 |
+
1.0335123538970947
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.17779576778411865,
|
| 228 |
+
-0.2223799079656601,
|
| 229 |
+
0.009585360996425152,
|
| 230 |
+
0.27525120973587036,
|
| 231 |
+
-0.3401731848716736,
|
| 232 |
+
-0.8740139603614807,
|
| 233 |
+
-0.922980010509491,
|
| 234 |
+
0.20966650545597076,
|
| 235 |
+
-0.5117865800857544,
|
| 236 |
+
-1.04777991771698,
|
| 237 |
+
0.13721425831317902,
|
| 238 |
+
-0.11607959121465683,
|
| 239 |
+
-0.006126723252236843,
|
| 240 |
+
-0.12117788940668106,
|
| 241 |
+
-0.5865428447723389,
|
| 242 |
+
-0.9999897480010986,
|
| 243 |
+
-0.48856121301651,
|
| 244 |
+
-0.09543908387422562,
|
| 245 |
+
-0.9954046607017517,
|
| 246 |
+
-1.1056499481201172
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.22190109610557557,
|
| 250 |
+
-0.19557971894741058,
|
| 251 |
+
0.02071425139904022,
|
| 252 |
+
0.33727509021759033,
|
| 253 |
+
-0.20722176849842072,
|
| 254 |
+
-0.8324707460403442,
|
| 255 |
+
-0.8625615048408508,
|
| 256 |
+
0.48607451796531675,
|
| 257 |
+
-0.30660848736763,
|
| 258 |
+
-0.9315443730354309,
|
| 259 |
+
0.19041878879070281,
|
| 260 |
+
-0.04380948930978775,
|
| 261 |
+
-0.0050327684171497826,
|
| 262 |
+
-0.05638677150011063,
|
| 263 |
+
-0.26807846426963805,
|
| 264 |
+
-0.9989835453033448,
|
| 265 |
+
-0.329305921792984,
|
| 266 |
+
0.4013799297809601,
|
| 267 |
+
-0.8769975972175598,
|
| 268 |
+
-1.088063154220581
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.6066525983810425,
|
| 272 |
+
0.13703446269035335,
|
| 273 |
+
0.3049686551094055,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.6961594343185422,
|
| 276 |
+
0.0980641171336174,
|
| 277 |
+
0.463529108762741,
|
| 278 |
+
0.9997917461395264,
|
| 279 |
+
0.7787499904632564,
|
| 280 |
+
1.0201601552963255,
|
| 281 |
+
0.3442830562591551,
|
| 282 |
+
0.1231292974948883,
|
| 283 |
+
0.30282683849334713,
|
| 284 |
+
0.9978944087028503,
|
| 285 |
+
0.41937308669090234,
|
| 286 |
+
0.0980641171336174,
|
| 287 |
+
0.27175513744354135,
|
| 288 |
+
0.999885528087616,
|
| 289 |
+
0.5146499085426329,
|
| 290 |
+
1.0030832004547119
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 7145,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_dish_drainer_new": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.40166154503822327,
|
| 322 |
+
-0.01112140342593193,
|
| 323 |
+
0.1838103085756302,
|
| 324 |
+
0.7193062901496887,
|
| 325 |
+
0.2537333369255066,
|
| 326 |
+
-0.5631371736526489,
|
| 327 |
+
-0.14408059418201447,
|
| 328 |
+
0.8170215487480164,
|
| 329 |
+
0.1759030818939209,
|
| 330 |
+
0.3052484393119812,
|
| 331 |
+
0.27496495842933655,
|
| 332 |
+
0.07536163926124573,
|
| 333 |
+
0.11210401356220245,
|
| 334 |
+
0.5866137146949768,
|
| 335 |
+
0.16616441309452057,
|
| 336 |
+
-0.6815541982650757,
|
| 337 |
+
-0.029566079378128052,
|
| 338 |
+
0.9651421308517456,
|
| 339 |
+
0.16927561163902283,
|
| 340 |
+
-0.015535339713096619
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.11050384491682053,
|
| 344 |
+
0.09560801833868027,
|
| 345 |
+
0.07149424403905869,
|
| 346 |
+
0.16429583728313446,
|
| 347 |
+
0.23607666790485382,
|
| 348 |
+
0.13553044199943542,
|
| 349 |
+
0.4136315882205963,
|
| 350 |
+
0.16760702431201935,
|
| 351 |
+
0.28564009070396423,
|
| 352 |
+
0.9522888660430908,
|
| 353 |
+
0.031309906393289566,
|
| 354 |
+
0.04574710130691528,
|
| 355 |
+
0.08567056804895401,
|
| 356 |
+
0.298023521900177,
|
| 357 |
+
0.15602006018161774,
|
| 358 |
+
0.22492952644824982,
|
| 359 |
+
0.10802315920591354,
|
| 360 |
+
0.04161505028605461,
|
| 361 |
+
0.15993013978004456,
|
| 362 |
+
0.9998809099197388
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.6568294763565063,
|
| 366 |
+
0.20922525227069855,
|
| 367 |
+
0.329291433095932,
|
| 368 |
+
0.9988790154457092,
|
| 369 |
+
0.8221861720085144,
|
| 370 |
+
-0.02126980759203434,
|
| 371 |
+
0.554952085018158,
|
| 372 |
+
0.9999961256980896,
|
| 373 |
+
0.8352594971656799,
|
| 374 |
+
1.0,
|
| 375 |
+
0.3725535273551941,
|
| 376 |
+
0.20133008062839508,
|
| 377 |
+
0.2683204710483551,
|
| 378 |
+
0.9969081878662109,
|
| 379 |
+
0.5947288274765015,
|
| 380 |
+
0.135818213224411,
|
| 381 |
+
0.297533243894577,
|
| 382 |
+
0.9999833106994629,
|
| 383 |
+
0.6284497380256653,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1679762601852417,
|
| 388 |
+
-0.2037276178598404,
|
| 389 |
+
0.026118876412510872,
|
| 390 |
+
0.06734701991081238,
|
| 391 |
+
-0.3303077816963196,
|
| 392 |
+
-0.865761399269104,
|
| 393 |
+
-0.9697803854942322,
|
| 394 |
+
0.24385260045528412,
|
| 395 |
+
-0.3337814211845398,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.17690593004226685,
|
| 398 |
+
-0.019342761486768723,
|
| 399 |
+
-0.045900676399469376,
|
| 400 |
+
-0.08388058096170425,
|
| 401 |
+
-0.1825810670852661,
|
| 402 |
+
-0.9999706149101257,
|
| 403 |
+
-0.4282298684120178,
|
| 404 |
+
0.7756603956222534,
|
| 405 |
+
-0.19046637415885925,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.22041156470775605,
|
| 410 |
+
-0.17958899974822998,
|
| 411 |
+
0.04473079532384872,
|
| 412 |
+
0.2284090793132782,
|
| 413 |
+
-0.2088965356349945,
|
| 414 |
+
-0.811203727722168,
|
| 415 |
+
-0.9306126594543457,
|
| 416 |
+
0.3530711317062378,
|
| 417 |
+
-0.2207678198814392,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.2055837804079056,
|
| 420 |
+
0.005079864375293255,
|
| 421 |
+
-0.04285515695810318,
|
| 422 |
+
0.023393160849809646,
|
| 423 |
+
-0.12909780085086822,
|
| 424 |
+
-0.9969730639457702,
|
| 425 |
+
-0.31871861577033994,
|
| 426 |
+
0.8128526282310485,
|
| 427 |
+
-0.12555764615535736,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.6256548881530761,
|
| 432 |
+
0.16506216526031484,
|
| 433 |
+
0.3053938007354736,
|
| 434 |
+
0.9577780866622918,
|
| 435 |
+
0.7322160029411315,
|
| 436 |
+
-0.22335838973522296,
|
| 437 |
+
0.43161325573921194,
|
| 438 |
+
0.9983374524116516,
|
| 439 |
+
0.7683744573593138,
|
| 440 |
+
1.0,
|
| 441 |
+
0.344862767457962,
|
| 442 |
+
0.19341405749320983,
|
| 443 |
+
0.24164194464683514,
|
| 444 |
+
0.9684402346611023,
|
| 445 |
+
0.5674381494522094,
|
| 446 |
+
-0.24195577383041442,
|
| 447 |
+
0.2379095745086669,
|
| 448 |
+
0.9997554516792297,
|
| 449 |
+
0.564831252098083,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.39952757954597473,
|
| 478 |
+
-0.01743784174323082,
|
| 479 |
+
0.17103438079357147,
|
| 480 |
+
0.7105360627174377,
|
| 481 |
+
0.238600954413414,
|
| 482 |
+
-0.5759879350662231,
|
| 483 |
+
-0.11663737148046494,
|
| 484 |
+
0.815979540348053,
|
| 485 |
+
0.1842789500951767,
|
| 486 |
+
0.32329148054122925,
|
| 487 |
+
0.2642972469329834,
|
| 488 |
+
0.056828975677490234,
|
| 489 |
+
0.10836686193943024,
|
| 490 |
+
0.5435150265693665,
|
| 491 |
+
0.10053255409002304,
|
| 492 |
+
-0.7011978030204773,
|
| 493 |
+
-0.03383756801486015,
|
| 494 |
+
0.950905442237854,
|
| 495 |
+
0.0682743638753891,
|
| 496 |
+
-0.11205191910266876
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.1054687574505806,
|
| 500 |
+
0.09407484531402588,
|
| 501 |
+
0.07594858109951019,
|
| 502 |
+
0.15545178949832916,
|
| 503 |
+
0.23370550572872162,
|
| 504 |
+
0.1663108915090561,
|
| 505 |
+
0.417312353849411,
|
| 506 |
+
0.1589633673429489,
|
| 507 |
+
0.295290470123291,
|
| 508 |
+
0.8386000990867615,
|
| 509 |
+
0.03193666413426399,
|
| 510 |
+
0.03702628239989281,
|
| 511 |
+
0.08499231189489365,
|
| 512 |
+
0.33746662735939026,
|
| 513 |
+
0.13817644119262695,
|
| 514 |
+
0.2642515301704407,
|
| 515 |
+
0.13742688298225403,
|
| 516 |
+
0.10328594595193863,
|
| 517 |
+
0.24581065773963928,
|
| 518 |
+
0.988041341304779
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.6216338276863098,
|
| 522 |
+
0.1681845635175705,
|
| 523 |
+
0.3582729399204254,
|
| 524 |
+
0.9998778104782104,
|
| 525 |
+
0.7569742202758789,
|
| 526 |
+
0.29317960143089294,
|
| 527 |
+
0.5474420785903931,
|
| 528 |
+
1.0,
|
| 529 |
+
0.9644882678985596,
|
| 530 |
+
1.2399240732192993,
|
| 531 |
+
0.36810019612312317,
|
| 532 |
+
0.15229015052318573,
|
| 533 |
+
0.3755773603916168,
|
| 534 |
+
0.9999530911445618,
|
| 535 |
+
0.47173869609832764,
|
| 536 |
+
0.4396477937698364,
|
| 537 |
+
0.5856077671051025,
|
| 538 |
+
1.0,
|
| 539 |
+
0.9141661524772644,
|
| 540 |
+
1.0335123538970947
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.17779576778411865,
|
| 544 |
+
-0.2223799079656601,
|
| 545 |
+
0.009585360996425152,
|
| 546 |
+
0.27525120973587036,
|
| 547 |
+
-0.3401731848716736,
|
| 548 |
+
-0.8740139603614807,
|
| 549 |
+
-0.922980010509491,
|
| 550 |
+
0.20966650545597076,
|
| 551 |
+
-0.5117865800857544,
|
| 552 |
+
-1.04777991771698,
|
| 553 |
+
0.13721425831317902,
|
| 554 |
+
-0.11607959121465683,
|
| 555 |
+
-0.006126723252236843,
|
| 556 |
+
-0.12117788940668106,
|
| 557 |
+
-0.5865428447723389,
|
| 558 |
+
-0.9999897480010986,
|
| 559 |
+
-0.48856121301651,
|
| 560 |
+
-0.09543908387422562,
|
| 561 |
+
-0.9954046607017517,
|
| 562 |
+
-1.1056499481201172
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.22190109610557557,
|
| 566 |
+
-0.19557971894741058,
|
| 567 |
+
0.02071425139904022,
|
| 568 |
+
0.33727509021759033,
|
| 569 |
+
-0.20722176849842072,
|
| 570 |
+
-0.8324707460403442,
|
| 571 |
+
-0.8625615048408508,
|
| 572 |
+
0.48607451796531675,
|
| 573 |
+
-0.30660848736763,
|
| 574 |
+
-0.9315443730354309,
|
| 575 |
+
0.19041878879070281,
|
| 576 |
+
-0.04380948930978775,
|
| 577 |
+
-0.0050327684171497826,
|
| 578 |
+
-0.05638677150011063,
|
| 579 |
+
-0.26807846426963805,
|
| 580 |
+
-0.9989835453033448,
|
| 581 |
+
-0.329305921792984,
|
| 582 |
+
0.4013799297809601,
|
| 583 |
+
-0.8769975972175598,
|
| 584 |
+
-1.088063154220581
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.6066525983810425,
|
| 588 |
+
0.13703446269035335,
|
| 589 |
+
0.3049686551094055,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.6961594343185422,
|
| 592 |
+
0.0980641171336174,
|
| 593 |
+
0.463529108762741,
|
| 594 |
+
0.9997917461395264,
|
| 595 |
+
0.7787499904632564,
|
| 596 |
+
1.0201601552963255,
|
| 597 |
+
0.3442830562591551,
|
| 598 |
+
0.1231292974948883,
|
| 599 |
+
0.30282683849334713,
|
| 600 |
+
0.9978944087028503,
|
| 601 |
+
0.41937308669090234,
|
| 602 |
+
0.0980641171336174,
|
| 603 |
+
0.27175513744354135,
|
| 604 |
+
0.999885528087616,
|
| 605 |
+
0.5146499085426329,
|
| 606 |
+
1.0030832004547119
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 7145,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcbde742d8d0fe9747cabd988a524d92c8d8512cc8c5f28e2853157bf95c38da
|
| 3 |
+
size 2889536104
|
1e-4/twinvla-scratch-1e-4-aloha_dish_drainer/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/tabletop-v3/twinvla-scratch-1e-4-aloha_dish_drainer/singlevla_config",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_handover_box/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_handover_box/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_handover_box_new": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.322445809841156,
|
| 6 |
+
-0.0713680312037468,
|
| 7 |
+
0.16350828111171722,
|
| 8 |
+
0.615584671497345,
|
| 9 |
+
0.02582639269530773,
|
| 10 |
+
-0.6291787028312683,
|
| 11 |
+
-0.12873496115207672,
|
| 12 |
+
0.9684517979621887,
|
| 13 |
+
-0.05324753001332283,
|
| 14 |
+
0.32048356533050537,
|
| 15 |
+
0.35535329580307007,
|
| 16 |
+
-0.017269128933548927,
|
| 17 |
+
0.25354719161987305,
|
| 18 |
+
0.946760356426239,
|
| 19 |
+
-0.1093481183052063,
|
| 20 |
+
-0.16364224255084991,
|
| 21 |
+
0.10808137059211731,
|
| 22 |
+
0.9652293920516968,
|
| 23 |
+
-0.08225571364164352,
|
| 24 |
+
0.6809535622596741
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.07454725354909897,
|
| 28 |
+
0.08869025856256485,
|
| 29 |
+
0.07996608316898346,
|
| 30 |
+
0.3346059024333954,
|
| 31 |
+
0.19985823333263397,
|
| 32 |
+
0.2694716155529022,
|
| 33 |
+
0.12514568865299225,
|
| 34 |
+
0.030899852514266968,
|
| 35 |
+
0.16146361827850342,
|
| 36 |
+
0.9473041296005249,
|
| 37 |
+
0.06487792730331421,
|
| 38 |
+
0.0389498695731163,
|
| 39 |
+
0.027652494609355927,
|
| 40 |
+
0.10490526258945465,
|
| 41 |
+
0.18384318053722382,
|
| 42 |
+
0.1417805403470993,
|
| 43 |
+
0.20332522690296173,
|
| 44 |
+
0.06656655669212341,
|
| 45 |
+
0.06421920657157898,
|
| 46 |
+
0.7322932481765747
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.48683926463127136,
|
| 50 |
+
0.0484432689845562,
|
| 51 |
+
0.31490612030029297,
|
| 52 |
+
0.99891197681427,
|
| 53 |
+
0.4277522563934326,
|
| 54 |
+
0.06322141736745834,
|
| 55 |
+
0.4004654884338379,
|
| 56 |
+
0.9999857544898987,
|
| 57 |
+
0.3100079298019409,
|
| 58 |
+
1.0,
|
| 59 |
+
0.5334027409553528,
|
| 60 |
+
0.08494444936513901,
|
| 61 |
+
0.36568865180015564,
|
| 62 |
+
0.9999882578849792,
|
| 63 |
+
0.2546274662017822,
|
| 64 |
+
0.1172015443444252,
|
| 65 |
+
0.7982608079910278,
|
| 66 |
+
0.9999992251396179,
|
| 67 |
+
0.20094169676303864,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1422317922115326,
|
| 72 |
+
-0.2763901352882385,
|
| 73 |
+
-0.0600760243833065,
|
| 74 |
+
-0.14848311245441437,
|
| 75 |
+
-0.6282482743263245,
|
| 76 |
+
-0.9999129176139832,
|
| 77 |
+
-0.42181891202926636,
|
| 78 |
+
0.7404066324234009,
|
| 79 |
+
-0.6676974296569824,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.1786160171031952,
|
| 82 |
+
-0.1845615804195404,
|
| 83 |
+
0.1687021553516388,
|
| 84 |
+
0.2762398421764374,
|
| 85 |
+
-0.7479667067527771,
|
| 86 |
+
-0.8485982418060303,
|
| 87 |
+
-0.2597721517086029,
|
| 88 |
+
0.6015138626098633,
|
| 89 |
+
-0.3933228552341461,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.1950138956308365,
|
| 94 |
+
-0.24691226959228516,
|
| 95 |
+
-0.015285035967826844,
|
| 96 |
+
-0.04555398792028427,
|
| 97 |
+
-0.4452396559715271,
|
| 98 |
+
-0.996303243637085,
|
| 99 |
+
-0.3760478734970093,
|
| 100 |
+
0.8516808867454528,
|
| 101 |
+
-0.46342918753623963,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.21926841557025908,
|
| 104 |
+
-0.1317625629901886,
|
| 105 |
+
0.1978745412826538,
|
| 106 |
+
0.5117229986190795,
|
| 107 |
+
-0.6376786828041077,
|
| 108 |
+
-0.6609986042976379,
|
| 109 |
+
-0.19099083304405212,
|
| 110 |
+
0.6930621123313904,
|
| 111 |
+
-0.2356126993894577,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.47150796771049497,
|
| 116 |
+
0.038070930540561675,
|
| 117 |
+
0.28182336688041676,
|
| 118 |
+
0.9817836880683899,
|
| 119 |
+
0.3871919810771942,
|
| 120 |
+
-0.1345064049959186,
|
| 121 |
+
0.20285944879054985,
|
| 122 |
+
0.9992118668556214,
|
| 123 |
+
0.2293877118825912,
|
| 124 |
+
1.0,
|
| 125 |
+
0.49810330152511595,
|
| 126 |
+
0.0599309906363487,
|
| 127 |
+
0.3309180569648742,
|
| 128 |
+
0.9995350050926208,
|
| 129 |
+
0.1829529863595952,
|
| 130 |
+
0.03216676786541939,
|
| 131 |
+
0.7132800936698909,
|
| 132 |
+
0.9997488117218017,
|
| 133 |
+
0.08941484957933345,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.3200362026691437,
|
| 162 |
+
-0.06315236538648605,
|
| 163 |
+
0.15397155284881592,
|
| 164 |
+
0.6064433455467224,
|
| 165 |
+
0.06654603034257889,
|
| 166 |
+
-0.640325129032135,
|
| 167 |
+
-0.09359700232744217,
|
| 168 |
+
0.9627139568328857,
|
| 169 |
+
0.03187317028641701,
|
| 170 |
+
0.6514412760734558,
|
| 171 |
+
0.35154762864112854,
|
| 172 |
+
-0.018256496638059616,
|
| 173 |
+
0.2389756739139557,
|
| 174 |
+
0.9405066967010498,
|
| 175 |
+
-0.11245886981487274,
|
| 176 |
+
-0.2019510418176651,
|
| 177 |
+
0.10934194922447205,
|
| 178 |
+
0.9565088152885437,
|
| 179 |
+
-0.08098198473453522,
|
| 180 |
+
0.8085420727729797
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.07218372821807861,
|
| 184 |
+
0.07983937114477158,
|
| 185 |
+
0.08212247490882874,
|
| 186 |
+
0.32560110092163086,
|
| 187 |
+
0.1554078459739685,
|
| 188 |
+
0.29600194096565247,
|
| 189 |
+
0.147041916847229,
|
| 190 |
+
0.06898749619722366,
|
| 191 |
+
0.1924201399087906,
|
| 192 |
+
0.47606807947158813,
|
| 193 |
+
0.06400062888860703,
|
| 194 |
+
0.03751807287335396,
|
| 195 |
+
0.032336752861738205,
|
| 196 |
+
0.10509955137968063,
|
| 197 |
+
0.1779795140028,
|
| 198 |
+
0.13889142870903015,
|
| 199 |
+
0.20048992335796356,
|
| 200 |
+
0.082735575735569,
|
| 201 |
+
0.13973525166511536,
|
| 202 |
+
0.40020087361335754
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.47570154070854187,
|
| 206 |
+
0.08932404220104218,
|
| 207 |
+
0.44513142108917236,
|
| 208 |
+
0.9999915361404419,
|
| 209 |
+
0.6316148042678833,
|
| 210 |
+
0.7311769127845764,
|
| 211 |
+
0.5646719932556152,
|
| 212 |
+
1.0,
|
| 213 |
+
0.9345466494560242,
|
| 214 |
+
1.3299691677093506,
|
| 215 |
+
0.5250220894813538,
|
| 216 |
+
0.07912999391555786,
|
| 217 |
+
0.41775044798851013,
|
| 218 |
+
0.9999979138374329,
|
| 219 |
+
0.2288104146718979,
|
| 220 |
+
0.2556033134460449,
|
| 221 |
+
0.7930954098701477,
|
| 222 |
+
1.0,
|
| 223 |
+
0.8460071086883545,
|
| 224 |
+
1.1448447704315186
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.15507374703884125,
|
| 228 |
+
-0.24968452751636505,
|
| 229 |
+
-0.005626574158668518,
|
| 230 |
+
-0.12249666452407837,
|
| 231 |
+
-0.3874700665473938,
|
| 232 |
+
-1.0,
|
| 233 |
+
-0.8481224179267883,
|
| 234 |
+
0.28493279218673706,
|
| 235 |
+
-0.8170893788337708,
|
| 236 |
+
-1.083611011505127,
|
| 237 |
+
0.18484443426132202,
|
| 238 |
+
-0.1679670214653015,
|
| 239 |
+
0.1543029397726059,
|
| 240 |
+
0.2590605616569519,
|
| 241 |
+
-0.7203781604766846,
|
| 242 |
+
-0.8606433272361755,
|
| 243 |
+
-0.2443554699420929,
|
| 244 |
+
0.2216777801513672,
|
| 245 |
+
-0.9731146693229675,
|
| 246 |
+
-1.0848060846328735
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.1903739631175995,
|
| 250 |
+
-0.22257488489151,
|
| 251 |
+
-0.0036250025033950804,
|
| 252 |
+
-0.015333320312201977,
|
| 253 |
+
-0.2553225290775299,
|
| 254 |
+
-0.9997995805740356,
|
| 255 |
+
-0.3545967137813568,
|
| 256 |
+
0.6295642066001892,
|
| 257 |
+
-0.32733017563819883,
|
| 258 |
+
-0.4065189242362976,
|
| 259 |
+
0.22028838396072387,
|
| 260 |
+
-0.1278022611141205,
|
| 261 |
+
0.17875114858150482,
|
| 262 |
+
0.488557243347168,
|
| 263 |
+
-0.6262442255020142,
|
| 264 |
+
-0.6858670902252197,
|
| 265 |
+
-0.17815817892551422,
|
| 266 |
+
0.6348884439468384,
|
| 267 |
+
-0.5856496715545654,
|
| 268 |
+
-0.4086606001853943
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.4643457818031311,
|
| 272 |
+
0.05302721098065367,
|
| 273 |
+
0.32663319587707507,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.426870135068893,
|
| 276 |
+
0.18705489814281454,
|
| 277 |
+
0.3631119978427884,
|
| 278 |
+
0.9999364447593689,
|
| 279 |
+
0.7475578069686883,
|
| 280 |
+
1.178509011268615,
|
| 281 |
+
0.4939642870426177,
|
| 282 |
+
0.051381030380725806,
|
| 283 |
+
0.3385275864601135,
|
| 284 |
+
0.999157931804657,
|
| 285 |
+
0.16684140086173982,
|
| 286 |
+
0.05098062053322772,
|
| 287 |
+
0.7065742087364195,
|
| 288 |
+
0.9998370099067688,
|
| 289 |
+
0.5137611627578699,
|
| 290 |
+
1.0447997903823851
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 11829,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_handover_box": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.322445809841156,
|
| 322 |
+
-0.0713680312037468,
|
| 323 |
+
0.16350828111171722,
|
| 324 |
+
0.615584671497345,
|
| 325 |
+
0.02582639269530773,
|
| 326 |
+
-0.6291787028312683,
|
| 327 |
+
-0.12873496115207672,
|
| 328 |
+
0.9684517979621887,
|
| 329 |
+
-0.05324753001332283,
|
| 330 |
+
0.32048356533050537,
|
| 331 |
+
0.35535329580307007,
|
| 332 |
+
-0.017269128933548927,
|
| 333 |
+
0.25354719161987305,
|
| 334 |
+
0.946760356426239,
|
| 335 |
+
-0.1093481183052063,
|
| 336 |
+
-0.16364224255084991,
|
| 337 |
+
0.10808137059211731,
|
| 338 |
+
0.9652293920516968,
|
| 339 |
+
-0.08225571364164352,
|
| 340 |
+
0.6809535622596741
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.07454725354909897,
|
| 344 |
+
0.08869025856256485,
|
| 345 |
+
0.07996608316898346,
|
| 346 |
+
0.3346059024333954,
|
| 347 |
+
0.19985823333263397,
|
| 348 |
+
0.2694716155529022,
|
| 349 |
+
0.12514568865299225,
|
| 350 |
+
0.030899852514266968,
|
| 351 |
+
0.16146361827850342,
|
| 352 |
+
0.9473041296005249,
|
| 353 |
+
0.06487792730331421,
|
| 354 |
+
0.0389498695731163,
|
| 355 |
+
0.027652494609355927,
|
| 356 |
+
0.10490526258945465,
|
| 357 |
+
0.18384318053722382,
|
| 358 |
+
0.1417805403470993,
|
| 359 |
+
0.20332522690296173,
|
| 360 |
+
0.06656655669212341,
|
| 361 |
+
0.06421920657157898,
|
| 362 |
+
0.7322932481765747
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.48683926463127136,
|
| 366 |
+
0.0484432689845562,
|
| 367 |
+
0.31490612030029297,
|
| 368 |
+
0.99891197681427,
|
| 369 |
+
0.4277522563934326,
|
| 370 |
+
0.06322141736745834,
|
| 371 |
+
0.4004654884338379,
|
| 372 |
+
0.9999857544898987,
|
| 373 |
+
0.3100079298019409,
|
| 374 |
+
1.0,
|
| 375 |
+
0.5334027409553528,
|
| 376 |
+
0.08494444936513901,
|
| 377 |
+
0.36568865180015564,
|
| 378 |
+
0.9999882578849792,
|
| 379 |
+
0.2546274662017822,
|
| 380 |
+
0.1172015443444252,
|
| 381 |
+
0.7982608079910278,
|
| 382 |
+
0.9999992251396179,
|
| 383 |
+
0.20094169676303864,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1422317922115326,
|
| 388 |
+
-0.2763901352882385,
|
| 389 |
+
-0.0600760243833065,
|
| 390 |
+
-0.14848311245441437,
|
| 391 |
+
-0.6282482743263245,
|
| 392 |
+
-0.9999129176139832,
|
| 393 |
+
-0.42181891202926636,
|
| 394 |
+
0.7404066324234009,
|
| 395 |
+
-0.6676974296569824,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.1786160171031952,
|
| 398 |
+
-0.1845615804195404,
|
| 399 |
+
0.1687021553516388,
|
| 400 |
+
0.2762398421764374,
|
| 401 |
+
-0.7479667067527771,
|
| 402 |
+
-0.8485982418060303,
|
| 403 |
+
-0.2597721517086029,
|
| 404 |
+
0.6015138626098633,
|
| 405 |
+
-0.3933228552341461,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.1950138956308365,
|
| 410 |
+
-0.24691226959228516,
|
| 411 |
+
-0.015285035967826844,
|
| 412 |
+
-0.04555398792028427,
|
| 413 |
+
-0.4452396559715271,
|
| 414 |
+
-0.996303243637085,
|
| 415 |
+
-0.3760478734970093,
|
| 416 |
+
0.8516808867454528,
|
| 417 |
+
-0.46342918753623963,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.21926841557025908,
|
| 420 |
+
-0.1317625629901886,
|
| 421 |
+
0.1978745412826538,
|
| 422 |
+
0.5117229986190795,
|
| 423 |
+
-0.6376786828041077,
|
| 424 |
+
-0.6609986042976379,
|
| 425 |
+
-0.19099083304405212,
|
| 426 |
+
0.6930621123313904,
|
| 427 |
+
-0.2356126993894577,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.47150796771049497,
|
| 432 |
+
0.038070930540561675,
|
| 433 |
+
0.28182336688041676,
|
| 434 |
+
0.9817836880683899,
|
| 435 |
+
0.3871919810771942,
|
| 436 |
+
-0.1345064049959186,
|
| 437 |
+
0.20285944879054985,
|
| 438 |
+
0.9992118668556214,
|
| 439 |
+
0.2293877118825912,
|
| 440 |
+
1.0,
|
| 441 |
+
0.49810330152511595,
|
| 442 |
+
0.0599309906363487,
|
| 443 |
+
0.3309180569648742,
|
| 444 |
+
0.9995350050926208,
|
| 445 |
+
0.1829529863595952,
|
| 446 |
+
0.03216676786541939,
|
| 447 |
+
0.7132800936698909,
|
| 448 |
+
0.9997488117218017,
|
| 449 |
+
0.08941484957933345,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.3200362026691437,
|
| 478 |
+
-0.06315236538648605,
|
| 479 |
+
0.15397155284881592,
|
| 480 |
+
0.6064433455467224,
|
| 481 |
+
0.06654603034257889,
|
| 482 |
+
-0.640325129032135,
|
| 483 |
+
-0.09359700232744217,
|
| 484 |
+
0.9627139568328857,
|
| 485 |
+
0.03187317028641701,
|
| 486 |
+
0.6514412760734558,
|
| 487 |
+
0.35154762864112854,
|
| 488 |
+
-0.018256496638059616,
|
| 489 |
+
0.2389756739139557,
|
| 490 |
+
0.9405066967010498,
|
| 491 |
+
-0.11245886981487274,
|
| 492 |
+
-0.2019510418176651,
|
| 493 |
+
0.10934194922447205,
|
| 494 |
+
0.9565088152885437,
|
| 495 |
+
-0.08098198473453522,
|
| 496 |
+
0.8085420727729797
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.07218372821807861,
|
| 500 |
+
0.07983937114477158,
|
| 501 |
+
0.08212247490882874,
|
| 502 |
+
0.32560110092163086,
|
| 503 |
+
0.1554078459739685,
|
| 504 |
+
0.29600194096565247,
|
| 505 |
+
0.147041916847229,
|
| 506 |
+
0.06898749619722366,
|
| 507 |
+
0.1924201399087906,
|
| 508 |
+
0.47606807947158813,
|
| 509 |
+
0.06400062888860703,
|
| 510 |
+
0.03751807287335396,
|
| 511 |
+
0.032336752861738205,
|
| 512 |
+
0.10509955137968063,
|
| 513 |
+
0.1779795140028,
|
| 514 |
+
0.13889142870903015,
|
| 515 |
+
0.20048992335796356,
|
| 516 |
+
0.082735575735569,
|
| 517 |
+
0.13973525166511536,
|
| 518 |
+
0.40020087361335754
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.47570154070854187,
|
| 522 |
+
0.08932404220104218,
|
| 523 |
+
0.44513142108917236,
|
| 524 |
+
0.9999915361404419,
|
| 525 |
+
0.6316148042678833,
|
| 526 |
+
0.7311769127845764,
|
| 527 |
+
0.5646719932556152,
|
| 528 |
+
1.0,
|
| 529 |
+
0.9345466494560242,
|
| 530 |
+
1.3299691677093506,
|
| 531 |
+
0.5250220894813538,
|
| 532 |
+
0.07912999391555786,
|
| 533 |
+
0.41775044798851013,
|
| 534 |
+
0.9999979138374329,
|
| 535 |
+
0.2288104146718979,
|
| 536 |
+
0.2556033134460449,
|
| 537 |
+
0.7930954098701477,
|
| 538 |
+
1.0,
|
| 539 |
+
0.8460071086883545,
|
| 540 |
+
1.1448447704315186
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.15507374703884125,
|
| 544 |
+
-0.24968452751636505,
|
| 545 |
+
-0.005626574158668518,
|
| 546 |
+
-0.12249666452407837,
|
| 547 |
+
-0.3874700665473938,
|
| 548 |
+
-1.0,
|
| 549 |
+
-0.8481224179267883,
|
| 550 |
+
0.28493279218673706,
|
| 551 |
+
-0.8170893788337708,
|
| 552 |
+
-1.083611011505127,
|
| 553 |
+
0.18484443426132202,
|
| 554 |
+
-0.1679670214653015,
|
| 555 |
+
0.1543029397726059,
|
| 556 |
+
0.2590605616569519,
|
| 557 |
+
-0.7203781604766846,
|
| 558 |
+
-0.8606433272361755,
|
| 559 |
+
-0.2443554699420929,
|
| 560 |
+
0.2216777801513672,
|
| 561 |
+
-0.9731146693229675,
|
| 562 |
+
-1.0848060846328735
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.1903739631175995,
|
| 566 |
+
-0.22257488489151,
|
| 567 |
+
-0.0036250025033950804,
|
| 568 |
+
-0.015333320312201977,
|
| 569 |
+
-0.2553225290775299,
|
| 570 |
+
-0.9997995805740356,
|
| 571 |
+
-0.3545967137813568,
|
| 572 |
+
0.6295642066001892,
|
| 573 |
+
-0.32733017563819883,
|
| 574 |
+
-0.4065189242362976,
|
| 575 |
+
0.22028838396072387,
|
| 576 |
+
-0.1278022611141205,
|
| 577 |
+
0.17875114858150482,
|
| 578 |
+
0.488557243347168,
|
| 579 |
+
-0.6262442255020142,
|
| 580 |
+
-0.6858670902252197,
|
| 581 |
+
-0.17815817892551422,
|
| 582 |
+
0.6348884439468384,
|
| 583 |
+
-0.5856496715545654,
|
| 584 |
+
-0.4086606001853943
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.4643457818031311,
|
| 588 |
+
0.05302721098065367,
|
| 589 |
+
0.32663319587707507,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.426870135068893,
|
| 592 |
+
0.18705489814281454,
|
| 593 |
+
0.3631119978427884,
|
| 594 |
+
0.9999364447593689,
|
| 595 |
+
0.7475578069686883,
|
| 596 |
+
1.178509011268615,
|
| 597 |
+
0.4939642870426177,
|
| 598 |
+
0.051381030380725806,
|
| 599 |
+
0.3385275864601135,
|
| 600 |
+
0.999157931804657,
|
| 601 |
+
0.16684140086173982,
|
| 602 |
+
0.05098062053322772,
|
| 603 |
+
0.7065742087364195,
|
| 604 |
+
0.9998370099067688,
|
| 605 |
+
0.5137611627578699,
|
| 606 |
+
1.0447997903823851
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 11829,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_handover_box/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ef8ab0367cab07a1e4d78f1285753400e6b24622a602d21ecb5a635be5d8704
|
| 3 |
+
size 2889536104
|
1e-4/twinvla-scratch-1e-4-aloha_handover_box/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_lift_box/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_lift_box/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_lift_box_new": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.3633340299129486,
|
| 6 |
+
-0.0188243817538023,
|
| 7 |
+
0.1790345162153244,
|
| 8 |
+
0.4083189070224762,
|
| 9 |
+
-0.11689117550849915,
|
| 10 |
+
-0.8073354959487915,
|
| 11 |
+
-0.10573232173919678,
|
| 12 |
+
0.9415333271026611,
|
| 13 |
+
-0.16247375309467316,
|
| 14 |
+
0.619253396987915,
|
| 15 |
+
0.36085841059684753,
|
| 16 |
+
0.013982076197862625,
|
| 17 |
+
0.20412708818912506,
|
| 18 |
+
0.5001599788665771,
|
| 19 |
+
0.11137505620718002,
|
| 20 |
+
-0.7415233254432678,
|
| 21 |
+
0.05212549492716789,
|
| 22 |
+
0.9483596682548523,
|
| 23 |
+
0.16254937648773193,
|
| 24 |
+
0.7590736150741577
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.0638059601187706,
|
| 28 |
+
0.06317954510450363,
|
| 29 |
+
0.11073724925518036,
|
| 30 |
+
0.31736424565315247,
|
| 31 |
+
0.13928908109664917,
|
| 32 |
+
0.21841377019882202,
|
| 33 |
+
0.22394296526908875,
|
| 34 |
+
0.08008279651403427,
|
| 35 |
+
0.1392110288143158,
|
| 36 |
+
0.7852216362953186,
|
| 37 |
+
0.056878820061683655,
|
| 38 |
+
0.059404969215393066,
|
| 39 |
+
0.1170634776353836,
|
| 40 |
+
0.3238432705402374,
|
| 41 |
+
0.14080214500427246,
|
| 42 |
+
0.25074857473373413,
|
| 43 |
+
0.2164432257413864,
|
| 44 |
+
0.07544830441474915,
|
| 45 |
+
0.1375824362039566,
|
| 46 |
+
0.651036262512207
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.5681452751159668,
|
| 50 |
+
0.2437673658132553,
|
| 51 |
+
0.45541316270828247,
|
| 52 |
+
0.9999293088912964,
|
| 53 |
+
0.523757815361023,
|
| 54 |
+
0.4592168927192688,
|
| 55 |
+
0.7756927013397217,
|
| 56 |
+
0.9999935030937195,
|
| 57 |
+
0.2805824279785156,
|
| 58 |
+
1.0,
|
| 59 |
+
0.5600330233573914,
|
| 60 |
+
0.3342031240463257,
|
| 61 |
+
0.4682213366031647,
|
| 62 |
+
0.9998393058776855,
|
| 63 |
+
0.7949740886688232,
|
| 64 |
+
0.1664249449968338,
|
| 65 |
+
0.9131186604499817,
|
| 66 |
+
0.9999967813491821,
|
| 67 |
+
0.7936055064201355,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1517709195613861,
|
| 72 |
+
-0.2900286316871643,
|
| 73 |
+
-0.07412093877792358,
|
| 74 |
+
-0.4022133946418762,
|
| 75 |
+
-0.7361933588981628,
|
| 76 |
+
-0.9999988079071045,
|
| 77 |
+
-0.9935019016265869,
|
| 78 |
+
0.10709662735462189,
|
| 79 |
+
-0.8023554682731628,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.15366072952747345,
|
| 82 |
+
-0.23686714470386505,
|
| 83 |
+
0.0008372184820473194,
|
| 84 |
+
-0.5509981513023376,
|
| 85 |
+
-0.35234102606773376,
|
| 86 |
+
-0.999956488609314,
|
| 87 |
+
-0.5318384766578674,
|
| 88 |
+
0.3388061225414276,
|
| 89 |
+
-0.27330997586250305,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.21054682105779648,
|
| 94 |
+
-0.1866426882147789,
|
| 95 |
+
0.008138886513188495,
|
| 96 |
+
-0.19710821226239203,
|
| 97 |
+
-0.5368945515155792,
|
| 98 |
+
-0.9981186389923096,
|
| 99 |
+
-0.6956261324882507,
|
| 100 |
+
0.6267582887411117,
|
| 101 |
+
-0.5600040704011917,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.2190245844423771,
|
| 104 |
+
-0.15968348175287247,
|
| 105 |
+
0.025033411756157874,
|
| 106 |
+
-0.23832830414175987,
|
| 107 |
+
-0.2097599548101425,
|
| 108 |
+
-0.9988620406389237,
|
| 109 |
+
-0.4039672353863716,
|
| 110 |
+
0.6080100274085999,
|
| 111 |
+
-0.19206354618072508,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.5033414244651794,
|
| 116 |
+
0.16928535521030416,
|
| 117 |
+
0.41566276580095285,
|
| 118 |
+
0.9899059218168258,
|
| 119 |
+
0.15462822496891018,
|
| 120 |
+
0.03764873944222882,
|
| 121 |
+
0.4657947558164549,
|
| 122 |
+
0.9995575082302094,
|
| 123 |
+
0.12326683558523567,
|
| 124 |
+
1.0,
|
| 125 |
+
0.4801343524456022,
|
| 126 |
+
0.1795493066310881,
|
| 127 |
+
0.4235989159345625,
|
| 128 |
+
0.9913575077056883,
|
| 129 |
+
0.5356137681007356,
|
| 130 |
+
0.044951977618036626,
|
| 131 |
+
0.7084567189216593,
|
| 132 |
+
0.9992782145738601,
|
| 133 |
+
0.6150667482614517,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.3302275836467743,
|
| 162 |
+
-0.022152910009026527,
|
| 163 |
+
0.15129828453063965,
|
| 164 |
+
0.2713927626609802,
|
| 165 |
+
-0.12320414930582047,
|
| 166 |
+
-0.8610084652900696,
|
| 167 |
+
-0.11408621817827225,
|
| 168 |
+
0.939403235912323,
|
| 169 |
+
-0.15196871757507324,
|
| 170 |
+
0.565762460231781,
|
| 171 |
+
0.32600972056388855,
|
| 172 |
+
0.012888750061392784,
|
| 173 |
+
0.17190414667129517,
|
| 174 |
+
0.3495619595050812,
|
| 175 |
+
0.11379297822713852,
|
| 176 |
+
-0.8095759749412537,
|
| 177 |
+
0.03240593522787094,
|
| 178 |
+
0.9490123391151428,
|
| 179 |
+
0.14386720955371857,
|
| 180 |
+
0.7014500498771667
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.0579490028321743,
|
| 184 |
+
0.07013536244630814,
|
| 185 |
+
0.09934847801923752,
|
| 186 |
+
0.311722993850708,
|
| 187 |
+
0.13217779994010925,
|
| 188 |
+
0.23493210971355438,
|
| 189 |
+
0.22829721868038177,
|
| 190 |
+
0.08239603042602539,
|
| 191 |
+
0.1500341296195984,
|
| 192 |
+
0.85152268409729,
|
| 193 |
+
0.04813271760940552,
|
| 194 |
+
0.06810667365789413,
|
| 195 |
+
0.10921778529882431,
|
| 196 |
+
0.34190264344215393,
|
| 197 |
+
0.14661571383476257,
|
| 198 |
+
0.26654884219169617,
|
| 199 |
+
0.22192326188087463,
|
| 200 |
+
0.08192053437232971,
|
| 201 |
+
0.1472003310918808,
|
| 202 |
+
0.7158037424087524
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.559941291809082,
|
| 206 |
+
0.26086756587028503,
|
| 207 |
+
0.4504527747631073,
|
| 208 |
+
0.9999247789382935,
|
| 209 |
+
0.4198993146419525,
|
| 210 |
+
0.3512286841869354,
|
| 211 |
+
0.7522457242012024,
|
| 212 |
+
1.0,
|
| 213 |
+
0.8956095576286316,
|
| 214 |
+
1.470957636833191,
|
| 215 |
+
0.5437091588973999,
|
| 216 |
+
0.32627788186073303,
|
| 217 |
+
0.4945259988307953,
|
| 218 |
+
0.9998428821563721,
|
| 219 |
+
0.7737792730331421,
|
| 220 |
+
0.4633983373641968,
|
| 221 |
+
0.9018308520317078,
|
| 222 |
+
1.0,
|
| 223 |
+
0.9907073378562927,
|
| 224 |
+
1.361535668373108
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.16681896150112152,
|
| 228 |
+
-0.20499344170093536,
|
| 229 |
+
-0.0030731656588613987,
|
| 230 |
+
-0.4872298836708069,
|
| 231 |
+
-0.6995252966880798,
|
| 232 |
+
-0.999997615814209,
|
| 233 |
+
-0.988165020942688,
|
| 234 |
+
0.14152538776397705,
|
| 235 |
+
-0.8483264446258545,
|
| 236 |
+
-1.2196638584136963,
|
| 237 |
+
0.14598572254180908,
|
| 238 |
+
-0.2277291864156723,
|
| 239 |
+
0.004666368011385202,
|
| 240 |
+
-0.5699886679649353,
|
| 241 |
+
-0.40678924322128296,
|
| 242 |
+
-0.9999999403953552,
|
| 243 |
+
-0.6972882151603699,
|
| 244 |
+
0.13462646305561066,
|
| 245 |
+
-0.643044650554657,
|
| 246 |
+
-1.164451003074646
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.2053149801492691,
|
| 250 |
+
-0.17586381256580352,
|
| 251 |
+
0.015469378884881736,
|
| 252 |
+
-0.2516648331284523,
|
| 253 |
+
-0.5193796420097351,
|
| 254 |
+
-0.9995058274269104,
|
| 255 |
+
-0.7092818850278855,
|
| 256 |
+
0.608681161403656,
|
| 257 |
+
-0.578884813785553,
|
| 258 |
+
-1.1618710005283355,
|
| 259 |
+
0.21638940930366515,
|
| 260 |
+
-0.1691040216386318,
|
| 261 |
+
0.011891756923869252,
|
| 262 |
+
-0.29012590169906616,
|
| 263 |
+
-0.20126488715410232,
|
| 264 |
+
-0.9995589327812194,
|
| 265 |
+
-0.49963704913854595,
|
| 266 |
+
0.533765652179718,
|
| 267 |
+
-0.18726778730750085,
|
| 268 |
+
-1.082753186225891
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.5071819436550137,
|
| 272 |
+
0.165744510143995,
|
| 273 |
+
0.40272374808788297,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.16266889929771197,
|
| 276 |
+
0.09040380395948588,
|
| 277 |
+
0.5001266032457347,
|
| 278 |
+
0.9997656464576721,
|
| 279 |
+
0.10759550034999843,
|
| 280 |
+
1.4176189756393425,
|
| 281 |
+
0.47452601760625834,
|
| 282 |
+
0.1839943121373646,
|
| 283 |
+
0.40895662158727647,
|
| 284 |
+
0.995180070400238,
|
| 285 |
+
0.5622373461723318,
|
| 286 |
+
0.07441098906099738,
|
| 287 |
+
0.7114433652162524,
|
| 288 |
+
0.999856880903244,
|
| 289 |
+
0.5974926966428754,
|
| 290 |
+
1.321595377922058
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 11572,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_lift_box": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.3633340299129486,
|
| 322 |
+
-0.0188243817538023,
|
| 323 |
+
0.1790345162153244,
|
| 324 |
+
0.4083189070224762,
|
| 325 |
+
-0.11689117550849915,
|
| 326 |
+
-0.8073354959487915,
|
| 327 |
+
-0.10573232173919678,
|
| 328 |
+
0.9415333271026611,
|
| 329 |
+
-0.16247375309467316,
|
| 330 |
+
0.619253396987915,
|
| 331 |
+
0.36085841059684753,
|
| 332 |
+
0.013982076197862625,
|
| 333 |
+
0.20412708818912506,
|
| 334 |
+
0.5001599788665771,
|
| 335 |
+
0.11137505620718002,
|
| 336 |
+
-0.7415233254432678,
|
| 337 |
+
0.05212549492716789,
|
| 338 |
+
0.9483596682548523,
|
| 339 |
+
0.16254937648773193,
|
| 340 |
+
0.7590736150741577
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.0638059601187706,
|
| 344 |
+
0.06317954510450363,
|
| 345 |
+
0.11073724925518036,
|
| 346 |
+
0.31736424565315247,
|
| 347 |
+
0.13928908109664917,
|
| 348 |
+
0.21841377019882202,
|
| 349 |
+
0.22394296526908875,
|
| 350 |
+
0.08008279651403427,
|
| 351 |
+
0.1392110288143158,
|
| 352 |
+
0.7852216362953186,
|
| 353 |
+
0.056878820061683655,
|
| 354 |
+
0.059404969215393066,
|
| 355 |
+
0.1170634776353836,
|
| 356 |
+
0.3238432705402374,
|
| 357 |
+
0.14080214500427246,
|
| 358 |
+
0.25074857473373413,
|
| 359 |
+
0.2164432257413864,
|
| 360 |
+
0.07544830441474915,
|
| 361 |
+
0.1375824362039566,
|
| 362 |
+
0.651036262512207
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.5681452751159668,
|
| 366 |
+
0.2437673658132553,
|
| 367 |
+
0.45541316270828247,
|
| 368 |
+
0.9999293088912964,
|
| 369 |
+
0.523757815361023,
|
| 370 |
+
0.4592168927192688,
|
| 371 |
+
0.7756927013397217,
|
| 372 |
+
0.9999935030937195,
|
| 373 |
+
0.2805824279785156,
|
| 374 |
+
1.0,
|
| 375 |
+
0.5600330233573914,
|
| 376 |
+
0.3342031240463257,
|
| 377 |
+
0.4682213366031647,
|
| 378 |
+
0.9998393058776855,
|
| 379 |
+
0.7949740886688232,
|
| 380 |
+
0.1664249449968338,
|
| 381 |
+
0.9131186604499817,
|
| 382 |
+
0.9999967813491821,
|
| 383 |
+
0.7936055064201355,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1517709195613861,
|
| 388 |
+
-0.2900286316871643,
|
| 389 |
+
-0.07412093877792358,
|
| 390 |
+
-0.4022133946418762,
|
| 391 |
+
-0.7361933588981628,
|
| 392 |
+
-0.9999988079071045,
|
| 393 |
+
-0.9935019016265869,
|
| 394 |
+
0.10709662735462189,
|
| 395 |
+
-0.8023554682731628,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.15366072952747345,
|
| 398 |
+
-0.23686714470386505,
|
| 399 |
+
0.0008372184820473194,
|
| 400 |
+
-0.5509981513023376,
|
| 401 |
+
-0.35234102606773376,
|
| 402 |
+
-0.999956488609314,
|
| 403 |
+
-0.5318384766578674,
|
| 404 |
+
0.3388061225414276,
|
| 405 |
+
-0.27330997586250305,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.21054682105779648,
|
| 410 |
+
-0.1866426882147789,
|
| 411 |
+
0.008138886513188495,
|
| 412 |
+
-0.19710821226239203,
|
| 413 |
+
-0.5368945515155792,
|
| 414 |
+
-0.9981186389923096,
|
| 415 |
+
-0.6956261324882507,
|
| 416 |
+
0.6267582887411117,
|
| 417 |
+
-0.5600040704011917,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.2190245844423771,
|
| 420 |
+
-0.15968348175287247,
|
| 421 |
+
0.025033411756157874,
|
| 422 |
+
-0.23832830414175987,
|
| 423 |
+
-0.2097599548101425,
|
| 424 |
+
-0.9988620406389237,
|
| 425 |
+
-0.4039672353863716,
|
| 426 |
+
0.6080100274085999,
|
| 427 |
+
-0.19206354618072508,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.5033414244651794,
|
| 432 |
+
0.16928535521030416,
|
| 433 |
+
0.41566276580095285,
|
| 434 |
+
0.9899059218168258,
|
| 435 |
+
0.15462822496891018,
|
| 436 |
+
0.03764873944222882,
|
| 437 |
+
0.4657947558164549,
|
| 438 |
+
0.9995575082302094,
|
| 439 |
+
0.12326683558523567,
|
| 440 |
+
1.0,
|
| 441 |
+
0.4801343524456022,
|
| 442 |
+
0.1795493066310881,
|
| 443 |
+
0.4235989159345625,
|
| 444 |
+
0.9913575077056883,
|
| 445 |
+
0.5356137681007356,
|
| 446 |
+
0.044951977618036626,
|
| 447 |
+
0.7084567189216593,
|
| 448 |
+
0.9992782145738601,
|
| 449 |
+
0.6150667482614517,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.3302275836467743,
|
| 478 |
+
-0.022152910009026527,
|
| 479 |
+
0.15129828453063965,
|
| 480 |
+
0.2713927626609802,
|
| 481 |
+
-0.12320414930582047,
|
| 482 |
+
-0.8610084652900696,
|
| 483 |
+
-0.11408621817827225,
|
| 484 |
+
0.939403235912323,
|
| 485 |
+
-0.15196871757507324,
|
| 486 |
+
0.565762460231781,
|
| 487 |
+
0.32600972056388855,
|
| 488 |
+
0.012888750061392784,
|
| 489 |
+
0.17190414667129517,
|
| 490 |
+
0.3495619595050812,
|
| 491 |
+
0.11379297822713852,
|
| 492 |
+
-0.8095759749412537,
|
| 493 |
+
0.03240593522787094,
|
| 494 |
+
0.9490123391151428,
|
| 495 |
+
0.14386720955371857,
|
| 496 |
+
0.7014500498771667
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.0579490028321743,
|
| 500 |
+
0.07013536244630814,
|
| 501 |
+
0.09934847801923752,
|
| 502 |
+
0.311722993850708,
|
| 503 |
+
0.13217779994010925,
|
| 504 |
+
0.23493210971355438,
|
| 505 |
+
0.22829721868038177,
|
| 506 |
+
0.08239603042602539,
|
| 507 |
+
0.1500341296195984,
|
| 508 |
+
0.85152268409729,
|
| 509 |
+
0.04813271760940552,
|
| 510 |
+
0.06810667365789413,
|
| 511 |
+
0.10921778529882431,
|
| 512 |
+
0.34190264344215393,
|
| 513 |
+
0.14661571383476257,
|
| 514 |
+
0.26654884219169617,
|
| 515 |
+
0.22192326188087463,
|
| 516 |
+
0.08192053437232971,
|
| 517 |
+
0.1472003310918808,
|
| 518 |
+
0.7158037424087524
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.559941291809082,
|
| 522 |
+
0.26086756587028503,
|
| 523 |
+
0.4504527747631073,
|
| 524 |
+
0.9999247789382935,
|
| 525 |
+
0.4198993146419525,
|
| 526 |
+
0.3512286841869354,
|
| 527 |
+
0.7522457242012024,
|
| 528 |
+
1.0,
|
| 529 |
+
0.8956095576286316,
|
| 530 |
+
1.470957636833191,
|
| 531 |
+
0.5437091588973999,
|
| 532 |
+
0.32627788186073303,
|
| 533 |
+
0.4945259988307953,
|
| 534 |
+
0.9998428821563721,
|
| 535 |
+
0.7737792730331421,
|
| 536 |
+
0.4633983373641968,
|
| 537 |
+
0.9018308520317078,
|
| 538 |
+
1.0,
|
| 539 |
+
0.9907073378562927,
|
| 540 |
+
1.361535668373108
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.16681896150112152,
|
| 544 |
+
-0.20499344170093536,
|
| 545 |
+
-0.0030731656588613987,
|
| 546 |
+
-0.4872298836708069,
|
| 547 |
+
-0.6995252966880798,
|
| 548 |
+
-0.999997615814209,
|
| 549 |
+
-0.988165020942688,
|
| 550 |
+
0.14152538776397705,
|
| 551 |
+
-0.8483264446258545,
|
| 552 |
+
-1.2196638584136963,
|
| 553 |
+
0.14598572254180908,
|
| 554 |
+
-0.2277291864156723,
|
| 555 |
+
0.004666368011385202,
|
| 556 |
+
-0.5699886679649353,
|
| 557 |
+
-0.40678924322128296,
|
| 558 |
+
-0.9999999403953552,
|
| 559 |
+
-0.6972882151603699,
|
| 560 |
+
0.13462646305561066,
|
| 561 |
+
-0.643044650554657,
|
| 562 |
+
-1.164451003074646
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.2053149801492691,
|
| 566 |
+
-0.17586381256580352,
|
| 567 |
+
0.015469378884881736,
|
| 568 |
+
-0.2516648331284523,
|
| 569 |
+
-0.5193796420097351,
|
| 570 |
+
-0.9995058274269104,
|
| 571 |
+
-0.7092818850278855,
|
| 572 |
+
0.608681161403656,
|
| 573 |
+
-0.578884813785553,
|
| 574 |
+
-1.1618710005283355,
|
| 575 |
+
0.21638940930366515,
|
| 576 |
+
-0.1691040216386318,
|
| 577 |
+
0.011891756923869252,
|
| 578 |
+
-0.29012590169906616,
|
| 579 |
+
-0.20126488715410232,
|
| 580 |
+
-0.9995589327812194,
|
| 581 |
+
-0.49963704913854595,
|
| 582 |
+
0.533765652179718,
|
| 583 |
+
-0.18726778730750085,
|
| 584 |
+
-1.082753186225891
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.5071819436550137,
|
| 588 |
+
0.165744510143995,
|
| 589 |
+
0.40272374808788297,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.16266889929771197,
|
| 592 |
+
0.09040380395948588,
|
| 593 |
+
0.5001266032457347,
|
| 594 |
+
0.9997656464576721,
|
| 595 |
+
0.10759550034999843,
|
| 596 |
+
1.4176189756393425,
|
| 597 |
+
0.47452601760625834,
|
| 598 |
+
0.1839943121373646,
|
| 599 |
+
0.40895662158727647,
|
| 600 |
+
0.995180070400238,
|
| 601 |
+
0.5622373461723318,
|
| 602 |
+
0.07441098906099738,
|
| 603 |
+
0.7114433652162524,
|
| 604 |
+
0.999856880903244,
|
| 605 |
+
0.5974926966428754,
|
| 606 |
+
1.321595377922058
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 11572,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_lift_box/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd03a177af8c0205abb9cb9d4e871bf3cab1238e469e9b7315941232a26e68a6
|
| 3 |
+
size 2889536104
|
1e-4/twinvla-scratch-1e-4-aloha_lift_box/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_shoes_table/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_shoes_table/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_shoes_table_new": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.3260679841041565,
|
| 6 |
+
-0.03153973072767258,
|
| 7 |
+
0.17551672458648682,
|
| 8 |
+
0.341669499874115,
|
| 9 |
+
0.1495978981256485,
|
| 10 |
+
-0.7719317078590393,
|
| 11 |
+
-0.7661705613136292,
|
| 12 |
+
0.3032568395137787,
|
| 13 |
+
-0.1495625078678131,
|
| 14 |
+
0.11935558915138245,
|
| 15 |
+
0.34630629420280457,
|
| 16 |
+
0.037363290786743164,
|
| 17 |
+
0.18034809827804565,
|
| 18 |
+
0.3275623619556427,
|
| 19 |
+
0.12991519272327423,
|
| 20 |
+
-0.831997811794281,
|
| 21 |
+
-0.7951440811157227,
|
| 22 |
+
0.25535571575164795,
|
| 23 |
+
-0.18214371800422668,
|
| 24 |
+
0.19820308685302734
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.0646045058965683,
|
| 28 |
+
0.09588947147130966,
|
| 29 |
+
0.10945845395326614,
|
| 30 |
+
0.3783091902732849,
|
| 31 |
+
0.1452837437391281,
|
| 32 |
+
0.3174603581428528,
|
| 33 |
+
0.3114127218723297,
|
| 34 |
+
0.417682021856308,
|
| 35 |
+
0.16497297585010529,
|
| 36 |
+
0.9928225874900818,
|
| 37 |
+
0.07045891135931015,
|
| 38 |
+
0.10143465548753738,
|
| 39 |
+
0.11245165765285492,
|
| 40 |
+
0.3225674331188202,
|
| 41 |
+
0.15707552433013916,
|
| 42 |
+
0.2342674732208252,
|
| 43 |
+
0.2608211040496826,
|
| 44 |
+
0.40032699704170227,
|
| 45 |
+
0.2026672214269638,
|
| 46 |
+
0.9801287651062012
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.48015326261520386,
|
| 50 |
+
0.179313525557518,
|
| 51 |
+
0.3523038923740387,
|
| 52 |
+
0.9999800324440002,
|
| 53 |
+
0.7157489657402039,
|
| 54 |
+
0.11180483549833298,
|
| 55 |
+
0.1242646798491478,
|
| 56 |
+
0.9998366832733154,
|
| 57 |
+
0.285250186920166,
|
| 58 |
+
1.0,
|
| 59 |
+
0.49179938435554504,
|
| 60 |
+
0.3557826578617096,
|
| 61 |
+
0.42447179555892944,
|
| 62 |
+
0.9987993836402893,
|
| 63 |
+
0.5477575659751892,
|
| 64 |
+
0.05208699405193329,
|
| 65 |
+
0.034653306007385254,
|
| 66 |
+
0.9937106966972351,
|
| 67 |
+
0.3852289915084839,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.12527647614479065,
|
| 72 |
+
-0.31394532322883606,
|
| 73 |
+
-0.04988693445920944,
|
| 74 |
+
-0.2561202347278595,
|
| 75 |
+
-0.30035507678985596,
|
| 76 |
+
-0.999981164932251,
|
| 77 |
+
-0.9999915957450867,
|
| 78 |
+
-0.644327700138092,
|
| 79 |
+
-0.7897446751594543,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.17444398999214172,
|
| 82 |
+
-0.23238857090473175,
|
| 83 |
+
-0.0659869983792305,
|
| 84 |
+
-0.35028380155563354,
|
| 85 |
+
-0.3673132658004761,
|
| 86 |
+
-0.9999988079071045,
|
| 87 |
+
-0.9999988675117493,
|
| 88 |
+
-0.7761710286140442,
|
| 89 |
+
-0.9717934131622314,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.1924597442150116,
|
| 94 |
+
-0.23709256052970887,
|
| 95 |
+
-0.031008305028080944,
|
| 96 |
+
-0.15678457915782928,
|
| 97 |
+
-0.1863800033926964,
|
| 98 |
+
-0.9994285225868225,
|
| 99 |
+
-0.9997011423110962,
|
| 100 |
+
-0.5719999492168426,
|
| 101 |
+
-0.6091587543487549,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.20810787677764891,
|
| 104 |
+
-0.16556282192468644,
|
| 105 |
+
-0.017654908634722234,
|
| 106 |
+
-0.15096036493778228,
|
| 107 |
+
-0.22608168572187423,
|
| 108 |
+
-0.998928040266037,
|
| 109 |
+
-0.9990629017353058,
|
| 110 |
+
-0.5276546537876129,
|
| 111 |
+
-0.7234344184398651,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.457787150144577,
|
| 116 |
+
0.1451936572790145,
|
| 117 |
+
0.34190638065338136,
|
| 118 |
+
0.9928598761558532,
|
| 119 |
+
0.5035569965839384,
|
| 120 |
+
-0.05978704243898392,
|
| 121 |
+
-0.01781813446432354,
|
| 122 |
+
0.9893046200275422,
|
| 123 |
+
0.17466527372598611,
|
| 124 |
+
1.0,
|
| 125 |
+
0.4768679320812225,
|
| 126 |
+
0.2598331540822982,
|
| 127 |
+
0.39134971201419827,
|
| 128 |
+
0.9723170220851898,
|
| 129 |
+
0.47033962905406923,
|
| 130 |
+
-0.1864572197198868,
|
| 131 |
+
-0.060312222316861244,
|
| 132 |
+
0.9807472229003906,
|
| 133 |
+
0.21941211968660337,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.32339081168174744,
|
| 162 |
+
-0.03213495388627052,
|
| 163 |
+
0.16396664083003998,
|
| 164 |
+
0.3156684637069702,
|
| 165 |
+
0.1496039181947708,
|
| 166 |
+
-0.7826846837997437,
|
| 167 |
+
-0.7692199945449829,
|
| 168 |
+
0.30184611678123474,
|
| 169 |
+
-0.12293443828821182,
|
| 170 |
+
0.6964511275291443,
|
| 171 |
+
0.34235236048698425,
|
| 172 |
+
0.03810478746891022,
|
| 173 |
+
0.16778817772865295,
|
| 174 |
+
0.29598966240882874,
|
| 175 |
+
0.12479892373085022,
|
| 176 |
+
-0.8384969234466553,
|
| 177 |
+
-0.7965013384819031,
|
| 178 |
+
0.2529187500476837,
|
| 179 |
+
-0.1519845873117447,
|
| 180 |
+
0.7593688368797302
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.062407124787569046,
|
| 184 |
+
0.09441280364990234,
|
| 185 |
+
0.10514319688081741,
|
| 186 |
+
0.3824003040790558,
|
| 187 |
+
0.14718197286128998,
|
| 188 |
+
0.3122296929359436,
|
| 189 |
+
0.3191576600074768,
|
| 190 |
+
0.41190221905708313,
|
| 191 |
+
0.1747966706752777,
|
| 192 |
+
0.3845253884792328,
|
| 193 |
+
0.06875813007354736,
|
| 194 |
+
0.09827680140733719,
|
| 195 |
+
0.10889745503664017,
|
| 196 |
+
0.33462250232696533,
|
| 197 |
+
0.15245455503463745,
|
| 198 |
+
0.24192583560943604,
|
| 199 |
+
0.2739580273628235,
|
| 200 |
+
0.40077677369117737,
|
| 201 |
+
0.20697632431983948,
|
| 202 |
+
0.3062511384487152
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.4779528081417084,
|
| 206 |
+
0.17402252554893494,
|
| 207 |
+
0.412266343832016,
|
| 208 |
+
0.9999450445175171,
|
| 209 |
+
0.6999034285545349,
|
| 210 |
+
0.4170636534690857,
|
| 211 |
+
0.4215781092643738,
|
| 212 |
+
1.0,
|
| 213 |
+
0.932714581489563,
|
| 214 |
+
1.3076640367507935,
|
| 215 |
+
0.48697036504745483,
|
| 216 |
+
0.34565815329551697,
|
| 217 |
+
0.415988564491272,
|
| 218 |
+
0.9998390078544617,
|
| 219 |
+
0.5593472123146057,
|
| 220 |
+
0.20724913477897644,
|
| 221 |
+
0.26142606139183044,
|
| 222 |
+
1.0,
|
| 223 |
+
0.5777683854103088,
|
| 224 |
+
1.3103067874908447
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.12448469549417496,
|
| 228 |
+
-0.30521926283836365,
|
| 229 |
+
-0.004976626019924879,
|
| 230 |
+
-0.21920020878314972,
|
| 231 |
+
-0.5096501708030701,
|
| 232 |
+
-1.0,
|
| 233 |
+
-0.9999982118606567,
|
| 234 |
+
-0.6262368559837341,
|
| 235 |
+
-0.7456304430961609,
|
| 236 |
+
-1.1091713905334473,
|
| 237 |
+
0.1249726265668869,
|
| 238 |
+
-0.22351478040218353,
|
| 239 |
+
-0.006724653299897909,
|
| 240 |
+
-0.36625856161117554,
|
| 241 |
+
-0.4249938726425171,
|
| 242 |
+
-0.9999956488609314,
|
| 243 |
+
-0.999992847442627,
|
| 244 |
+
-0.77183997631073,
|
| 245 |
+
-0.9583328366279602,
|
| 246 |
+
-1.04777991771698
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.19817760735750198,
|
| 250 |
+
-0.2323527842760086,
|
| 251 |
+
-0.004393648169934749,
|
| 252 |
+
-0.14680973589420318,
|
| 253 |
+
-0.1899831309914589,
|
| 254 |
+
-0.9998269140720367,
|
| 255 |
+
-0.9998353064060211,
|
| 256 |
+
-0.49930458664894106,
|
| 257 |
+
-0.5963611721992492,
|
| 258 |
+
-1.0807034492492675,
|
| 259 |
+
0.20821888744831085,
|
| 260 |
+
-0.13953636586666107,
|
| 261 |
+
-0.0033576888265088203,
|
| 262 |
+
-0.1788107320666313,
|
| 263 |
+
-0.22050866037607195,
|
| 264 |
+
-0.999310964345932,
|
| 265 |
+
-0.9993988335132599,
|
| 266 |
+
-0.5106797099113465,
|
| 267 |
+
-0.7302295982837677,
|
| 268 |
+
0.05842937603592872
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.44812404513359066,
|
| 272 |
+
0.14210240542888639,
|
| 273 |
+
0.337252739071846,
|
| 274 |
+
0.9943239092826842,
|
| 275 |
+
0.5118523061275481,
|
| 276 |
+
0.031205366365610953,
|
| 277 |
+
0.04714705012738701,
|
| 278 |
+
0.992770653963089,
|
| 279 |
+
0.18282963484525644,
|
| 280 |
+
1.1270769238471985,
|
| 281 |
+
0.47021201252937317,
|
| 282 |
+
0.2550090014934537,
|
| 283 |
+
0.3824465185403823,
|
| 284 |
+
0.995180070400238,
|
| 285 |
+
0.46117363572120657,
|
| 286 |
+
-0.047424964234233064,
|
| 287 |
+
-3.7679112665500725e-06,
|
| 288 |
+
0.9869830250740051,
|
| 289 |
+
0.23266565054655072,
|
| 290 |
+
1.1919615149497986
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 12911,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_shoes_table": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.3260679841041565,
|
| 322 |
+
-0.03153973072767258,
|
| 323 |
+
0.17551672458648682,
|
| 324 |
+
0.341669499874115,
|
| 325 |
+
0.1495978981256485,
|
| 326 |
+
-0.7719317078590393,
|
| 327 |
+
-0.7661705613136292,
|
| 328 |
+
0.3032568395137787,
|
| 329 |
+
-0.1495625078678131,
|
| 330 |
+
0.11935558915138245,
|
| 331 |
+
0.34630629420280457,
|
| 332 |
+
0.037363290786743164,
|
| 333 |
+
0.18034809827804565,
|
| 334 |
+
0.3275623619556427,
|
| 335 |
+
0.12991519272327423,
|
| 336 |
+
-0.831997811794281,
|
| 337 |
+
-0.7951440811157227,
|
| 338 |
+
0.25535571575164795,
|
| 339 |
+
-0.18214371800422668,
|
| 340 |
+
0.19820308685302734
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.0646045058965683,
|
| 344 |
+
0.09588947147130966,
|
| 345 |
+
0.10945845395326614,
|
| 346 |
+
0.3783091902732849,
|
| 347 |
+
0.1452837437391281,
|
| 348 |
+
0.3174603581428528,
|
| 349 |
+
0.3114127218723297,
|
| 350 |
+
0.417682021856308,
|
| 351 |
+
0.16497297585010529,
|
| 352 |
+
0.9928225874900818,
|
| 353 |
+
0.07045891135931015,
|
| 354 |
+
0.10143465548753738,
|
| 355 |
+
0.11245165765285492,
|
| 356 |
+
0.3225674331188202,
|
| 357 |
+
0.15707552433013916,
|
| 358 |
+
0.2342674732208252,
|
| 359 |
+
0.2608211040496826,
|
| 360 |
+
0.40032699704170227,
|
| 361 |
+
0.2026672214269638,
|
| 362 |
+
0.9801287651062012
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.48015326261520386,
|
| 366 |
+
0.179313525557518,
|
| 367 |
+
0.3523038923740387,
|
| 368 |
+
0.9999800324440002,
|
| 369 |
+
0.7157489657402039,
|
| 370 |
+
0.11180483549833298,
|
| 371 |
+
0.1242646798491478,
|
| 372 |
+
0.9998366832733154,
|
| 373 |
+
0.285250186920166,
|
| 374 |
+
1.0,
|
| 375 |
+
0.49179938435554504,
|
| 376 |
+
0.3557826578617096,
|
| 377 |
+
0.42447179555892944,
|
| 378 |
+
0.9987993836402893,
|
| 379 |
+
0.5477575659751892,
|
| 380 |
+
0.05208699405193329,
|
| 381 |
+
0.034653306007385254,
|
| 382 |
+
0.9937106966972351,
|
| 383 |
+
0.3852289915084839,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.12527647614479065,
|
| 388 |
+
-0.31394532322883606,
|
| 389 |
+
-0.04988693445920944,
|
| 390 |
+
-0.2561202347278595,
|
| 391 |
+
-0.30035507678985596,
|
| 392 |
+
-0.999981164932251,
|
| 393 |
+
-0.9999915957450867,
|
| 394 |
+
-0.644327700138092,
|
| 395 |
+
-0.7897446751594543,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.17444398999214172,
|
| 398 |
+
-0.23238857090473175,
|
| 399 |
+
-0.0659869983792305,
|
| 400 |
+
-0.35028380155563354,
|
| 401 |
+
-0.3673132658004761,
|
| 402 |
+
-0.9999988079071045,
|
| 403 |
+
-0.9999988675117493,
|
| 404 |
+
-0.7761710286140442,
|
| 405 |
+
-0.9717934131622314,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.1924597442150116,
|
| 410 |
+
-0.23709256052970887,
|
| 411 |
+
-0.031008305028080944,
|
| 412 |
+
-0.15678457915782928,
|
| 413 |
+
-0.1863800033926964,
|
| 414 |
+
-0.9994285225868225,
|
| 415 |
+
-0.9997011423110962,
|
| 416 |
+
-0.5719999492168426,
|
| 417 |
+
-0.6091587543487549,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.20810787677764891,
|
| 420 |
+
-0.16556282192468644,
|
| 421 |
+
-0.017654908634722234,
|
| 422 |
+
-0.15096036493778228,
|
| 423 |
+
-0.22608168572187423,
|
| 424 |
+
-0.998928040266037,
|
| 425 |
+
-0.9990629017353058,
|
| 426 |
+
-0.5276546537876129,
|
| 427 |
+
-0.7234344184398651,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.457787150144577,
|
| 432 |
+
0.1451936572790145,
|
| 433 |
+
0.34190638065338136,
|
| 434 |
+
0.9928598761558532,
|
| 435 |
+
0.5035569965839384,
|
| 436 |
+
-0.05978704243898392,
|
| 437 |
+
-0.01781813446432354,
|
| 438 |
+
0.9893046200275422,
|
| 439 |
+
0.17466527372598611,
|
| 440 |
+
1.0,
|
| 441 |
+
0.4768679320812225,
|
| 442 |
+
0.2598331540822982,
|
| 443 |
+
0.39134971201419827,
|
| 444 |
+
0.9723170220851898,
|
| 445 |
+
0.47033962905406923,
|
| 446 |
+
-0.1864572197198868,
|
| 447 |
+
-0.060312222316861244,
|
| 448 |
+
0.9807472229003906,
|
| 449 |
+
0.21941211968660337,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.32339081168174744,
|
| 478 |
+
-0.03213495388627052,
|
| 479 |
+
0.16396664083003998,
|
| 480 |
+
0.3156684637069702,
|
| 481 |
+
0.1496039181947708,
|
| 482 |
+
-0.7826846837997437,
|
| 483 |
+
-0.7692199945449829,
|
| 484 |
+
0.30184611678123474,
|
| 485 |
+
-0.12293443828821182,
|
| 486 |
+
0.6964511275291443,
|
| 487 |
+
0.34235236048698425,
|
| 488 |
+
0.03810478746891022,
|
| 489 |
+
0.16778817772865295,
|
| 490 |
+
0.29598966240882874,
|
| 491 |
+
0.12479892373085022,
|
| 492 |
+
-0.8384969234466553,
|
| 493 |
+
-0.7965013384819031,
|
| 494 |
+
0.2529187500476837,
|
| 495 |
+
-0.1519845873117447,
|
| 496 |
+
0.7593688368797302
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.062407124787569046,
|
| 500 |
+
0.09441280364990234,
|
| 501 |
+
0.10514319688081741,
|
| 502 |
+
0.3824003040790558,
|
| 503 |
+
0.14718197286128998,
|
| 504 |
+
0.3122296929359436,
|
| 505 |
+
0.3191576600074768,
|
| 506 |
+
0.41190221905708313,
|
| 507 |
+
0.1747966706752777,
|
| 508 |
+
0.3845253884792328,
|
| 509 |
+
0.06875813007354736,
|
| 510 |
+
0.09827680140733719,
|
| 511 |
+
0.10889745503664017,
|
| 512 |
+
0.33462250232696533,
|
| 513 |
+
0.15245455503463745,
|
| 514 |
+
0.24192583560943604,
|
| 515 |
+
0.2739580273628235,
|
| 516 |
+
0.40077677369117737,
|
| 517 |
+
0.20697632431983948,
|
| 518 |
+
0.3062511384487152
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.4779528081417084,
|
| 522 |
+
0.17402252554893494,
|
| 523 |
+
0.412266343832016,
|
| 524 |
+
0.9999450445175171,
|
| 525 |
+
0.6999034285545349,
|
| 526 |
+
0.4170636534690857,
|
| 527 |
+
0.4215781092643738,
|
| 528 |
+
1.0,
|
| 529 |
+
0.932714581489563,
|
| 530 |
+
1.3076640367507935,
|
| 531 |
+
0.48697036504745483,
|
| 532 |
+
0.34565815329551697,
|
| 533 |
+
0.415988564491272,
|
| 534 |
+
0.9998390078544617,
|
| 535 |
+
0.5593472123146057,
|
| 536 |
+
0.20724913477897644,
|
| 537 |
+
0.26142606139183044,
|
| 538 |
+
1.0,
|
| 539 |
+
0.5777683854103088,
|
| 540 |
+
1.3103067874908447
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.12448469549417496,
|
| 544 |
+
-0.30521926283836365,
|
| 545 |
+
-0.004976626019924879,
|
| 546 |
+
-0.21920020878314972,
|
| 547 |
+
-0.5096501708030701,
|
| 548 |
+
-1.0,
|
| 549 |
+
-0.9999982118606567,
|
| 550 |
+
-0.6262368559837341,
|
| 551 |
+
-0.7456304430961609,
|
| 552 |
+
-1.1091713905334473,
|
| 553 |
+
0.1249726265668869,
|
| 554 |
+
-0.22351478040218353,
|
| 555 |
+
-0.006724653299897909,
|
| 556 |
+
-0.36625856161117554,
|
| 557 |
+
-0.4249938726425171,
|
| 558 |
+
-0.9999956488609314,
|
| 559 |
+
-0.999992847442627,
|
| 560 |
+
-0.77183997631073,
|
| 561 |
+
-0.9583328366279602,
|
| 562 |
+
-1.04777991771698
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.19817760735750198,
|
| 566 |
+
-0.2323527842760086,
|
| 567 |
+
-0.004393648169934749,
|
| 568 |
+
-0.14680973589420318,
|
| 569 |
+
-0.1899831309914589,
|
| 570 |
+
-0.9998269140720367,
|
| 571 |
+
-0.9998353064060211,
|
| 572 |
+
-0.49930458664894106,
|
| 573 |
+
-0.5963611721992492,
|
| 574 |
+
-1.0807034492492675,
|
| 575 |
+
0.20821888744831085,
|
| 576 |
+
-0.13953636586666107,
|
| 577 |
+
-0.0033576888265088203,
|
| 578 |
+
-0.1788107320666313,
|
| 579 |
+
-0.22050866037607195,
|
| 580 |
+
-0.999310964345932,
|
| 581 |
+
-0.9993988335132599,
|
| 582 |
+
-0.5106797099113465,
|
| 583 |
+
-0.7302295982837677,
|
| 584 |
+
0.05842937603592872
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.44812404513359066,
|
| 588 |
+
0.14210240542888639,
|
| 589 |
+
0.337252739071846,
|
| 590 |
+
0.9943239092826842,
|
| 591 |
+
0.5118523061275481,
|
| 592 |
+
0.031205366365610953,
|
| 593 |
+
0.04714705012738701,
|
| 594 |
+
0.992770653963089,
|
| 595 |
+
0.18282963484525644,
|
| 596 |
+
1.1270769238471985,
|
| 597 |
+
0.47021201252937317,
|
| 598 |
+
0.2550090014934537,
|
| 599 |
+
0.3824465185403823,
|
| 600 |
+
0.995180070400238,
|
| 601 |
+
0.46117363572120657,
|
| 602 |
+
-0.047424964234233064,
|
| 603 |
+
-3.7679112665500725e-06,
|
| 604 |
+
0.9869830250740051,
|
| 605 |
+
0.23266565054655072,
|
| 606 |
+
1.1919615149497986
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 12911,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
1e-4/twinvla-scratch-1e-4-aloha_shoes_table/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03fd044ea6df68237b7ac32e1e28050faa5813813df1357668cba2b59a5e8146
|
| 3 |
+
size 2889536104
|
1e-4/twinvla-scratch-1e-4-aloha_shoes_table/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
2e-5/twinvla-aloha_shoes_table/config.json
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": false,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": "428b4d21376ff21d70b8b8830db6f6ab3907bfd8",
|
| 28 |
+
"_name_or_path": "jellyho/TwinVLA",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "false",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"dataset_statistics_path": null,
|
| 45 |
+
"decoder_start_token_id": null,
|
| 46 |
+
"denoiser": "FM",
|
| 47 |
+
"diffusion_batch": 32,
|
| 48 |
+
"dit_size": "DiT-B",
|
| 49 |
+
"diversity_penalty": 0.0,
|
| 50 |
+
"do_sample": false,
|
| 51 |
+
"downsample_ratio": 0.5,
|
| 52 |
+
"dynamic_image_size": true,
|
| 53 |
+
"early_stopping": false,
|
| 54 |
+
"efficient_loss": true,
|
| 55 |
+
"enable_cfg": true,
|
| 56 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 57 |
+
"eos_token_id": null,
|
| 58 |
+
"exponential_decay_length_penalty": null,
|
| 59 |
+
"finetuning_task": null,
|
| 60 |
+
"force_image_size": 448,
|
| 61 |
+
"forced_bos_token_id": null,
|
| 62 |
+
"forced_eos_token_id": null,
|
| 63 |
+
"global_normalization": true,
|
| 64 |
+
"hz_interpolate": 20,
|
| 65 |
+
"id2label": {
|
| 66 |
+
"0": "LABEL_0",
|
| 67 |
+
"1": "LABEL_1"
|
| 68 |
+
},
|
| 69 |
+
"image_size": 224,
|
| 70 |
+
"interpolate_gripper": false,
|
| 71 |
+
"is_decoder": false,
|
| 72 |
+
"is_encoder_decoder": false,
|
| 73 |
+
"keep_aspect_ratio": false,
|
| 74 |
+
"knowledge_insulation": false,
|
| 75 |
+
"label2id": {
|
| 76 |
+
"LABEL_0": 0,
|
| 77 |
+
"LABEL_1": 1
|
| 78 |
+
},
|
| 79 |
+
"length_penalty": 1.0,
|
| 80 |
+
"llm_config": {
|
| 81 |
+
"_attn_implementation_autoset": true,
|
| 82 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 83 |
+
"add_cross_attention": false,
|
| 84 |
+
"architectures": [
|
| 85 |
+
"Qwen2ForCausalLM"
|
| 86 |
+
],
|
| 87 |
+
"attention_dropout": 0.0,
|
| 88 |
+
"auto_map": {
|
| 89 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 90 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 91 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 92 |
+
},
|
| 93 |
+
"bad_words_ids": null,
|
| 94 |
+
"begin_suppress_tokens": null,
|
| 95 |
+
"bos_token_id": 151643,
|
| 96 |
+
"chunk_size_feed_forward": 0,
|
| 97 |
+
"cross_attention_hidden_size": null,
|
| 98 |
+
"decoder_start_token_id": null,
|
| 99 |
+
"diversity_penalty": 0.0,
|
| 100 |
+
"do_sample": false,
|
| 101 |
+
"early_stopping": false,
|
| 102 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 103 |
+
"eos_token_id": 151645,
|
| 104 |
+
"exponential_decay_length_penalty": null,
|
| 105 |
+
"finetuning_task": null,
|
| 106 |
+
"forced_bos_token_id": null,
|
| 107 |
+
"forced_eos_token_id": null,
|
| 108 |
+
"hidden_act": "silu",
|
| 109 |
+
"hidden_size": 896,
|
| 110 |
+
"id2label": {
|
| 111 |
+
"0": "LABEL_0",
|
| 112 |
+
"1": "LABEL_1"
|
| 113 |
+
},
|
| 114 |
+
"initializer_range": 0.02,
|
| 115 |
+
"intermediate_size": 4864,
|
| 116 |
+
"is_decoder": false,
|
| 117 |
+
"is_encoder_decoder": false,
|
| 118 |
+
"label2id": {
|
| 119 |
+
"LABEL_0": 0,
|
| 120 |
+
"LABEL_1": 1
|
| 121 |
+
},
|
| 122 |
+
"length_penalty": 1.0,
|
| 123 |
+
"max_length": 20,
|
| 124 |
+
"max_position_embeddings": 32768,
|
| 125 |
+
"max_window_layers": 21,
|
| 126 |
+
"min_length": 0,
|
| 127 |
+
"model_type": "qwen2",
|
| 128 |
+
"no_repeat_ngram_size": 0,
|
| 129 |
+
"num_attention_heads": 14,
|
| 130 |
+
"num_beam_groups": 1,
|
| 131 |
+
"num_beams": 1,
|
| 132 |
+
"num_hidden_layers": 24,
|
| 133 |
+
"num_key_value_heads": 2,
|
| 134 |
+
"num_return_sequences": 1,
|
| 135 |
+
"output_attentions": false,
|
| 136 |
+
"output_hidden_states": false,
|
| 137 |
+
"output_scores": false,
|
| 138 |
+
"pad_token_id": null,
|
| 139 |
+
"prefix": null,
|
| 140 |
+
"problem_type": null,
|
| 141 |
+
"pruned_heads": {},
|
| 142 |
+
"remove_invalid_values": false,
|
| 143 |
+
"repetition_penalty": 1.0,
|
| 144 |
+
"return_dict": true,
|
| 145 |
+
"return_dict_in_generate": false,
|
| 146 |
+
"rms_norm_eps": 1e-06,
|
| 147 |
+
"rope_scaling": null,
|
| 148 |
+
"rope_theta": 1000000.0,
|
| 149 |
+
"sep_token_id": null,
|
| 150 |
+
"sliding_window": 32768,
|
| 151 |
+
"suppress_tokens": null,
|
| 152 |
+
"task_specific_params": null,
|
| 153 |
+
"temperature": 1.0,
|
| 154 |
+
"tf_legacy_loss": false,
|
| 155 |
+
"tie_encoder_decoder": false,
|
| 156 |
+
"tie_word_embeddings": true,
|
| 157 |
+
"tokenizer_class": null,
|
| 158 |
+
"top_k": 50,
|
| 159 |
+
"top_p": 1.0,
|
| 160 |
+
"torch_dtype": "bfloat16",
|
| 161 |
+
"torchscript": false,
|
| 162 |
+
"transformers_version": "4.50.0.dev0",
|
| 163 |
+
"typical_p": 1.0,
|
| 164 |
+
"use_bfloat16": false,
|
| 165 |
+
"use_cache": false,
|
| 166 |
+
"use_sliding_window": false,
|
| 167 |
+
"vocab_size": 151674
|
| 168 |
+
},
|
| 169 |
+
"loss_version": "v4",
|
| 170 |
+
"max_dynamic_patch": 12,
|
| 171 |
+
"max_length": 20,
|
| 172 |
+
"min_dynamic_patch": 1,
|
| 173 |
+
"min_length": 0,
|
| 174 |
+
"mlp_checkpoint": true,
|
| 175 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 176 |
+
"model_type": "Eagle2_1BVLA",
|
| 177 |
+
"modeling": "denoising",
|
| 178 |
+
"no_repeat_ngram_size": 0,
|
| 179 |
+
"normalization": "quantile",
|
| 180 |
+
"num_beam_groups": 1,
|
| 181 |
+
"num_beams": 1,
|
| 182 |
+
"num_readouts": 1,
|
| 183 |
+
"num_return_sequences": 1,
|
| 184 |
+
"output_attentions": false,
|
| 185 |
+
"output_hidden_states": false,
|
| 186 |
+
"output_scores": false,
|
| 187 |
+
"pad2square": false,
|
| 188 |
+
"pad_token_id": null,
|
| 189 |
+
"pre_feature_reduction": false,
|
| 190 |
+
"prefix": null,
|
| 191 |
+
"problem_type": null,
|
| 192 |
+
"pruned_heads": {},
|
| 193 |
+
"ps_version": "v2",
|
| 194 |
+
"readout_token_as_eos": false,
|
| 195 |
+
"remove_invalid_values": false,
|
| 196 |
+
"repetition_penalty": 1.0,
|
| 197 |
+
"return_dict": true,
|
| 198 |
+
"return_dict_in_generate": false,
|
| 199 |
+
"return_text": null,
|
| 200 |
+
"select_layer": -1,
|
| 201 |
+
"sep_token_id": null,
|
| 202 |
+
"state_dim": 10,
|
| 203 |
+
"stopping_token": "|",
|
| 204 |
+
"suppress_tokens": null,
|
| 205 |
+
"task_specific_params": null,
|
| 206 |
+
"temperature": 1.0,
|
| 207 |
+
"template": "qwen2-chat",
|
| 208 |
+
"test_denoising_steps": 10,
|
| 209 |
+
"tf_legacy_loss": false,
|
| 210 |
+
"tie_encoder_decoder": false,
|
| 211 |
+
"tie_word_embeddings": true,
|
| 212 |
+
"tokenizer_class": null,
|
| 213 |
+
"top_k": 50,
|
| 214 |
+
"top_p": 1.0,
|
| 215 |
+
"torch_dtype": "bfloat16",
|
| 216 |
+
"torchscript": false,
|
| 217 |
+
"train_denoising_steps": 100,
|
| 218 |
+
"typical_p": 1.0,
|
| 219 |
+
"use_backbone_lora": 0,
|
| 220 |
+
"use_bfloat16": false,
|
| 221 |
+
"use_llm_lora": 0,
|
| 222 |
+
"use_thumbnail": true,
|
| 223 |
+
"vision_config": {
|
| 224 |
+
"_attn_implementation_autoset": true,
|
| 225 |
+
"_name_or_path": "",
|
| 226 |
+
"add_cross_attention": false,
|
| 227 |
+
"architectures": [
|
| 228 |
+
"SiglipVisionModel"
|
| 229 |
+
],
|
| 230 |
+
"attention_dropout": 0.0,
|
| 231 |
+
"auto_map": {
|
| 232 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 233 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 234 |
+
},
|
| 235 |
+
"bad_words_ids": null,
|
| 236 |
+
"begin_suppress_tokens": null,
|
| 237 |
+
"bos_token_id": null,
|
| 238 |
+
"chunk_size_feed_forward": 0,
|
| 239 |
+
"cross_attention_hidden_size": null,
|
| 240 |
+
"decoder_start_token_id": null,
|
| 241 |
+
"diversity_penalty": 0.0,
|
| 242 |
+
"do_sample": false,
|
| 243 |
+
"drop_path_rate": 0.1,
|
| 244 |
+
"early_stopping": false,
|
| 245 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 246 |
+
"eos_token_id": null,
|
| 247 |
+
"exponential_decay_length_penalty": null,
|
| 248 |
+
"finetuning_task": null,
|
| 249 |
+
"forced_bos_token_id": null,
|
| 250 |
+
"forced_eos_token_id": null,
|
| 251 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 252 |
+
"hidden_size": 1152,
|
| 253 |
+
"id2label": {
|
| 254 |
+
"0": "LABEL_0",
|
| 255 |
+
"1": "LABEL_1"
|
| 256 |
+
},
|
| 257 |
+
"image_size": 448,
|
| 258 |
+
"intermediate_size": 4304,
|
| 259 |
+
"is_decoder": false,
|
| 260 |
+
"is_encoder_decoder": false,
|
| 261 |
+
"label2id": {
|
| 262 |
+
"LABEL_0": 0,
|
| 263 |
+
"LABEL_1": 1
|
| 264 |
+
},
|
| 265 |
+
"layer_norm_eps": 1e-06,
|
| 266 |
+
"length_penalty": 1.0,
|
| 267 |
+
"max_length": 20,
|
| 268 |
+
"min_length": 0,
|
| 269 |
+
"model_type": "siglip_vision_model",
|
| 270 |
+
"no_repeat_ngram_size": 0,
|
| 271 |
+
"num_attention_heads": 16,
|
| 272 |
+
"num_beam_groups": 1,
|
| 273 |
+
"num_beams": 1,
|
| 274 |
+
"num_channels": 3,
|
| 275 |
+
"num_hidden_layers": 27,
|
| 276 |
+
"num_image_tokens": 1024,
|
| 277 |
+
"num_return_sequences": 1,
|
| 278 |
+
"output_attentions": false,
|
| 279 |
+
"output_hidden_states": false,
|
| 280 |
+
"output_scores": false,
|
| 281 |
+
"pad_token_id": null,
|
| 282 |
+
"patch_size": 14,
|
| 283 |
+
"prefix": null,
|
| 284 |
+
"problem_type": null,
|
| 285 |
+
"projection_dim": 2048,
|
| 286 |
+
"projector_hidden_act": "gelu_fast",
|
| 287 |
+
"pruned_heads": {},
|
| 288 |
+
"remove_invalid_values": false,
|
| 289 |
+
"repetition_penalty": 1.0,
|
| 290 |
+
"return_dict": true,
|
| 291 |
+
"return_dict_in_generate": false,
|
| 292 |
+
"sep_token_id": null,
|
| 293 |
+
"suppress_tokens": null,
|
| 294 |
+
"task_specific_params": null,
|
| 295 |
+
"temperature": 1.0,
|
| 296 |
+
"tf_legacy_loss": false,
|
| 297 |
+
"tie_encoder_decoder": false,
|
| 298 |
+
"tie_word_embeddings": true,
|
| 299 |
+
"tokenizer_class": null,
|
| 300 |
+
"top_k": 50,
|
| 301 |
+
"top_p": 1.0,
|
| 302 |
+
"torch_dtype": "bfloat16",
|
| 303 |
+
"torchscript": false,
|
| 304 |
+
"transformers_version": "4.50.0.dev0",
|
| 305 |
+
"typical_p": 1.0,
|
| 306 |
+
"use_bfloat16": false,
|
| 307 |
+
"vision_use_head": false
|
| 308 |
+
},
|
| 309 |
+
"vocab_size": 151674,
|
| 310 |
+
"vocab_start": null
|
| 311 |
+
},
|
| 312 |
+
"singlevla_config_path": "jellyho/TwinVLA",
|
| 313 |
+
"singlevla_pretrained_path": null,
|
| 314 |
+
"state_dim": 10,
|
| 315 |
+
"torch_dtype": "bfloat16",
|
| 316 |
+
"transformers_version": "4.50.0.dev0"
|
| 317 |
+
}
|
2e-5/twinvla-aloha_shoes_table/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_shoes_table_new": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.32606810331344604,
|
| 6 |
+
-0.03153972327709198,
|
| 7 |
+
0.17551687359809875,
|
| 8 |
+
0.3416697382926941,
|
| 9 |
+
0.14959800243377686,
|
| 10 |
+
-0.7719306349754333,
|
| 11 |
+
-0.7661699652671814,
|
| 12 |
+
0.30325645208358765,
|
| 13 |
+
-0.1495625525712967,
|
| 14 |
+
0.11935558915138245,
|
| 15 |
+
0.3463062345981598,
|
| 16 |
+
0.03736328333616257,
|
| 17 |
+
0.1803482174873352,
|
| 18 |
+
0.3275619447231293,
|
| 19 |
+
0.1299152374267578,
|
| 20 |
+
-0.8319970369338989,
|
| 21 |
+
-0.7951449155807495,
|
| 22 |
+
0.255355566740036,
|
| 23 |
+
-0.18214373290538788,
|
| 24 |
+
0.19820308685302734
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.06460446119308472,
|
| 28 |
+
0.09588943421840668,
|
| 29 |
+
0.10945848375558853,
|
| 30 |
+
0.3783090114593506,
|
| 31 |
+
0.1452838033437729,
|
| 32 |
+
0.3174605965614319,
|
| 33 |
+
0.3114127814769745,
|
| 34 |
+
0.41768184304237366,
|
| 35 |
+
0.16497282683849335,
|
| 36 |
+
0.9928211569786072,
|
| 37 |
+
0.07045891135931015,
|
| 38 |
+
0.1014346107840538,
|
| 39 |
+
0.11245167255401611,
|
| 40 |
+
0.32256755232810974,
|
| 41 |
+
0.15707549452781677,
|
| 42 |
+
0.2342674434185028,
|
| 43 |
+
0.26082107424736023,
|
| 44 |
+
0.4003267288208008,
|
| 45 |
+
0.20266719162464142,
|
| 46 |
+
0.980128288269043
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.48015326261520386,
|
| 50 |
+
0.179313525557518,
|
| 51 |
+
0.3523038923740387,
|
| 52 |
+
0.9999800324440002,
|
| 53 |
+
0.7157489657402039,
|
| 54 |
+
0.11180483549833298,
|
| 55 |
+
0.1242646798491478,
|
| 56 |
+
0.9998366832733154,
|
| 57 |
+
0.285250186920166,
|
| 58 |
+
1.0,
|
| 59 |
+
0.49179938435554504,
|
| 60 |
+
0.3557826578617096,
|
| 61 |
+
0.42447179555892944,
|
| 62 |
+
0.9987993836402893,
|
| 63 |
+
0.5477575659751892,
|
| 64 |
+
0.05208699405193329,
|
| 65 |
+
0.034653306007385254,
|
| 66 |
+
0.9937106966972351,
|
| 67 |
+
0.3852289915084839,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.12527647614479065,
|
| 72 |
+
-0.31394532322883606,
|
| 73 |
+
-0.04988693445920944,
|
| 74 |
+
-0.2561202347278595,
|
| 75 |
+
-0.30035507678985596,
|
| 76 |
+
-0.999981164932251,
|
| 77 |
+
-0.9999915957450867,
|
| 78 |
+
-0.644327700138092,
|
| 79 |
+
-0.7897446751594543,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.17444398999214172,
|
| 82 |
+
-0.23238857090473175,
|
| 83 |
+
-0.0659869983792305,
|
| 84 |
+
-0.35028380155563354,
|
| 85 |
+
-0.3673132658004761,
|
| 86 |
+
-0.9999988079071045,
|
| 87 |
+
-0.9999988675117493,
|
| 88 |
+
-0.7761710286140442,
|
| 89 |
+
-0.9717934131622314,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.1924597442150116,
|
| 94 |
+
-0.23709256052970887,
|
| 95 |
+
-0.031008305028080944,
|
| 96 |
+
-0.15678457915782928,
|
| 97 |
+
-0.1863800033926964,
|
| 98 |
+
-0.9994285225868225,
|
| 99 |
+
-0.9997011423110962,
|
| 100 |
+
-0.5719999492168426,
|
| 101 |
+
-0.6091587543487549,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.20810787677764891,
|
| 104 |
+
-0.16556282192468644,
|
| 105 |
+
-0.017654908634722234,
|
| 106 |
+
-0.15096036493778228,
|
| 107 |
+
-0.22608168572187423,
|
| 108 |
+
-0.998928040266037,
|
| 109 |
+
-0.9990629017353058,
|
| 110 |
+
-0.5276546537876129,
|
| 111 |
+
-0.7234344184398651,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.457787150144577,
|
| 116 |
+
0.1451936572790145,
|
| 117 |
+
0.34190638065338136,
|
| 118 |
+
0.9928598761558532,
|
| 119 |
+
0.5035569965839384,
|
| 120 |
+
-0.05978704243898392,
|
| 121 |
+
-0.01781813446432354,
|
| 122 |
+
0.9893046200275422,
|
| 123 |
+
0.17466527372598611,
|
| 124 |
+
1.0,
|
| 125 |
+
0.4768679320812225,
|
| 126 |
+
0.2598331540822982,
|
| 127 |
+
0.39134971201419827,
|
| 128 |
+
0.9723170220851898,
|
| 129 |
+
0.47033962905406923,
|
| 130 |
+
-0.1864572197198868,
|
| 131 |
+
-0.060312222316861244,
|
| 132 |
+
0.9807472229003906,
|
| 133 |
+
0.21941211968660337,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.3233906924724579,
|
| 162 |
+
-0.032134901732206345,
|
| 163 |
+
0.16396647691726685,
|
| 164 |
+
0.3156682550907135,
|
| 165 |
+
0.1496039479970932,
|
| 166 |
+
-0.7826839685440063,
|
| 167 |
+
-0.769219696521759,
|
| 168 |
+
0.3018459677696228,
|
| 169 |
+
-0.12293437123298645,
|
| 170 |
+
0.6964512467384338,
|
| 171 |
+
0.34235188364982605,
|
| 172 |
+
0.03810477629303932,
|
| 173 |
+
0.16778846085071564,
|
| 174 |
+
0.2959897220134735,
|
| 175 |
+
0.12479893863201141,
|
| 176 |
+
-0.8384960293769836,
|
| 177 |
+
-0.7965015172958374,
|
| 178 |
+
0.2529186010360718,
|
| 179 |
+
-0.1519845426082611,
|
| 180 |
+
0.7593687772750854
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.062407124787569046,
|
| 184 |
+
0.09441278874874115,
|
| 185 |
+
0.10514318943023682,
|
| 186 |
+
0.38240039348602295,
|
| 187 |
+
0.14718197286128998,
|
| 188 |
+
0.31222963333129883,
|
| 189 |
+
0.31915774941444397,
|
| 190 |
+
0.41190215945243835,
|
| 191 |
+
0.1747966706752777,
|
| 192 |
+
0.3845251202583313,
|
| 193 |
+
0.06875818967819214,
|
| 194 |
+
0.09827672690153122,
|
| 195 |
+
0.10889741778373718,
|
| 196 |
+
0.3346223533153534,
|
| 197 |
+
0.15245452523231506,
|
| 198 |
+
0.241925910115242,
|
| 199 |
+
0.2739580273628235,
|
| 200 |
+
0.4007769823074341,
|
| 201 |
+
0.20697632431983948,
|
| 202 |
+
0.30625119805336
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.4779528081417084,
|
| 206 |
+
0.17402252554893494,
|
| 207 |
+
0.412266343832016,
|
| 208 |
+
0.9999450445175171,
|
| 209 |
+
0.6999034285545349,
|
| 210 |
+
0.4170636534690857,
|
| 211 |
+
0.4215781092643738,
|
| 212 |
+
1.0,
|
| 213 |
+
0.932714581489563,
|
| 214 |
+
1.3076640367507935,
|
| 215 |
+
0.48697036504745483,
|
| 216 |
+
0.34565815329551697,
|
| 217 |
+
0.415988564491272,
|
| 218 |
+
0.9998390078544617,
|
| 219 |
+
0.5593472123146057,
|
| 220 |
+
0.20724913477897644,
|
| 221 |
+
0.26142606139183044,
|
| 222 |
+
1.0,
|
| 223 |
+
0.5777683854103088,
|
| 224 |
+
1.3103067874908447
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.12448469549417496,
|
| 228 |
+
-0.30521926283836365,
|
| 229 |
+
-0.004976626019924879,
|
| 230 |
+
-0.21920020878314972,
|
| 231 |
+
-0.5096501708030701,
|
| 232 |
+
-1.0,
|
| 233 |
+
-0.9999982118606567,
|
| 234 |
+
-0.6262368559837341,
|
| 235 |
+
-0.7456304430961609,
|
| 236 |
+
-1.1091713905334473,
|
| 237 |
+
0.1249726265668869,
|
| 238 |
+
-0.22351478040218353,
|
| 239 |
+
-0.006724653299897909,
|
| 240 |
+
-0.36625856161117554,
|
| 241 |
+
-0.4249938726425171,
|
| 242 |
+
-0.9999956488609314,
|
| 243 |
+
-0.999992847442627,
|
| 244 |
+
-0.77183997631073,
|
| 245 |
+
-0.9583328366279602,
|
| 246 |
+
-1.04777991771698
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.19817760735750198,
|
| 250 |
+
-0.2323527842760086,
|
| 251 |
+
-0.004393648169934749,
|
| 252 |
+
-0.14680973589420318,
|
| 253 |
+
-0.1899831309914589,
|
| 254 |
+
-0.9998269140720367,
|
| 255 |
+
-0.9998353064060211,
|
| 256 |
+
-0.49930458664894106,
|
| 257 |
+
-0.5963611721992492,
|
| 258 |
+
-1.0807034492492675,
|
| 259 |
+
0.20821888744831085,
|
| 260 |
+
-0.13953636586666107,
|
| 261 |
+
-0.0033576888265088203,
|
| 262 |
+
-0.1788107320666313,
|
| 263 |
+
-0.22050866037607195,
|
| 264 |
+
-0.999310964345932,
|
| 265 |
+
-0.9993988335132599,
|
| 266 |
+
-0.5106797099113465,
|
| 267 |
+
-0.7302295982837677,
|
| 268 |
+
0.05842937603592872
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.44812404513359066,
|
| 272 |
+
0.14210240542888639,
|
| 273 |
+
0.337252739071846,
|
| 274 |
+
0.9943239092826842,
|
| 275 |
+
0.5118523061275481,
|
| 276 |
+
0.031205366365610953,
|
| 277 |
+
0.04714705012738701,
|
| 278 |
+
0.992770653963089,
|
| 279 |
+
0.18282963484525644,
|
| 280 |
+
1.1270769238471985,
|
| 281 |
+
0.47021201252937317,
|
| 282 |
+
0.2550090014934537,
|
| 283 |
+
0.3824465185403823,
|
| 284 |
+
0.995180070400238,
|
| 285 |
+
0.46117363572120657,
|
| 286 |
+
-0.047424964234233064,
|
| 287 |
+
-3.7679112665500725e-06,
|
| 288 |
+
0.9869830250740051,
|
| 289 |
+
0.23266565054655072,
|
| 290 |
+
1.1919615149497986
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 12911,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_shoes_table": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.32606810331344604,
|
| 322 |
+
-0.03153972327709198,
|
| 323 |
+
0.17551687359809875,
|
| 324 |
+
0.3416697382926941,
|
| 325 |
+
0.14959800243377686,
|
| 326 |
+
-0.7719306349754333,
|
| 327 |
+
-0.7661699652671814,
|
| 328 |
+
0.30325645208358765,
|
| 329 |
+
-0.1495625525712967,
|
| 330 |
+
0.11935558915138245,
|
| 331 |
+
0.3463062345981598,
|
| 332 |
+
0.03736328333616257,
|
| 333 |
+
0.1803482174873352,
|
| 334 |
+
0.3275619447231293,
|
| 335 |
+
0.1299152374267578,
|
| 336 |
+
-0.8319970369338989,
|
| 337 |
+
-0.7951449155807495,
|
| 338 |
+
0.255355566740036,
|
| 339 |
+
-0.18214373290538788,
|
| 340 |
+
0.19820308685302734
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.06460446119308472,
|
| 344 |
+
0.09588943421840668,
|
| 345 |
+
0.10945848375558853,
|
| 346 |
+
0.3783090114593506,
|
| 347 |
+
0.1452838033437729,
|
| 348 |
+
0.3174605965614319,
|
| 349 |
+
0.3114127814769745,
|
| 350 |
+
0.41768184304237366,
|
| 351 |
+
0.16497282683849335,
|
| 352 |
+
0.9928211569786072,
|
| 353 |
+
0.07045891135931015,
|
| 354 |
+
0.1014346107840538,
|
| 355 |
+
0.11245167255401611,
|
| 356 |
+
0.32256755232810974,
|
| 357 |
+
0.15707549452781677,
|
| 358 |
+
0.2342674434185028,
|
| 359 |
+
0.26082107424736023,
|
| 360 |
+
0.4003267288208008,
|
| 361 |
+
0.20266719162464142,
|
| 362 |
+
0.980128288269043
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.48015326261520386,
|
| 366 |
+
0.179313525557518,
|
| 367 |
+
0.3523038923740387,
|
| 368 |
+
0.9999800324440002,
|
| 369 |
+
0.7157489657402039,
|
| 370 |
+
0.11180483549833298,
|
| 371 |
+
0.1242646798491478,
|
| 372 |
+
0.9998366832733154,
|
| 373 |
+
0.285250186920166,
|
| 374 |
+
1.0,
|
| 375 |
+
0.49179938435554504,
|
| 376 |
+
0.3557826578617096,
|
| 377 |
+
0.42447179555892944,
|
| 378 |
+
0.9987993836402893,
|
| 379 |
+
0.5477575659751892,
|
| 380 |
+
0.05208699405193329,
|
| 381 |
+
0.034653306007385254,
|
| 382 |
+
0.9937106966972351,
|
| 383 |
+
0.3852289915084839,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.12527647614479065,
|
| 388 |
+
-0.31394532322883606,
|
| 389 |
+
-0.04988693445920944,
|
| 390 |
+
-0.2561202347278595,
|
| 391 |
+
-0.30035507678985596,
|
| 392 |
+
-0.999981164932251,
|
| 393 |
+
-0.9999915957450867,
|
| 394 |
+
-0.644327700138092,
|
| 395 |
+
-0.7897446751594543,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.17444398999214172,
|
| 398 |
+
-0.23238857090473175,
|
| 399 |
+
-0.0659869983792305,
|
| 400 |
+
-0.35028380155563354,
|
| 401 |
+
-0.3673132658004761,
|
| 402 |
+
-0.9999988079071045,
|
| 403 |
+
-0.9999988675117493,
|
| 404 |
+
-0.7761710286140442,
|
| 405 |
+
-0.9717934131622314,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.1924597442150116,
|
| 410 |
+
-0.23709256052970887,
|
| 411 |
+
-0.031008305028080944,
|
| 412 |
+
-0.15678457915782928,
|
| 413 |
+
-0.1863800033926964,
|
| 414 |
+
-0.9994285225868225,
|
| 415 |
+
-0.9997011423110962,
|
| 416 |
+
-0.5719999492168426,
|
| 417 |
+
-0.6091587543487549,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.20810787677764891,
|
| 420 |
+
-0.16556282192468644,
|
| 421 |
+
-0.017654908634722234,
|
| 422 |
+
-0.15096036493778228,
|
| 423 |
+
-0.22608168572187423,
|
| 424 |
+
-0.998928040266037,
|
| 425 |
+
-0.9990629017353058,
|
| 426 |
+
-0.5276546537876129,
|
| 427 |
+
-0.7234344184398651,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.457787150144577,
|
| 432 |
+
0.1451936572790145,
|
| 433 |
+
0.34190638065338136,
|
| 434 |
+
0.9928598761558532,
|
| 435 |
+
0.5035569965839384,
|
| 436 |
+
-0.05978704243898392,
|
| 437 |
+
-0.01781813446432354,
|
| 438 |
+
0.9893046200275422,
|
| 439 |
+
0.17466527372598611,
|
| 440 |
+
1.0,
|
| 441 |
+
0.4768679320812225,
|
| 442 |
+
0.2598331540822982,
|
| 443 |
+
0.39134971201419827,
|
| 444 |
+
0.9723170220851898,
|
| 445 |
+
0.47033962905406923,
|
| 446 |
+
-0.1864572197198868,
|
| 447 |
+
-0.060312222316861244,
|
| 448 |
+
0.9807472229003906,
|
| 449 |
+
0.21941211968660337,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.3233906924724579,
|
| 478 |
+
-0.032134901732206345,
|
| 479 |
+
0.16396647691726685,
|
| 480 |
+
0.3156682550907135,
|
| 481 |
+
0.1496039479970932,
|
| 482 |
+
-0.7826839685440063,
|
| 483 |
+
-0.769219696521759,
|
| 484 |
+
0.3018459677696228,
|
| 485 |
+
-0.12293437123298645,
|
| 486 |
+
0.6964512467384338,
|
| 487 |
+
0.34235188364982605,
|
| 488 |
+
0.03810477629303932,
|
| 489 |
+
0.16778846085071564,
|
| 490 |
+
0.2959897220134735,
|
| 491 |
+
0.12479893863201141,
|
| 492 |
+
-0.8384960293769836,
|
| 493 |
+
-0.7965015172958374,
|
| 494 |
+
0.2529186010360718,
|
| 495 |
+
-0.1519845426082611,
|
| 496 |
+
0.7593687772750854
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.062407124787569046,
|
| 500 |
+
0.09441278874874115,
|
| 501 |
+
0.10514318943023682,
|
| 502 |
+
0.38240039348602295,
|
| 503 |
+
0.14718197286128998,
|
| 504 |
+
0.31222963333129883,
|
| 505 |
+
0.31915774941444397,
|
| 506 |
+
0.41190215945243835,
|
| 507 |
+
0.1747966706752777,
|
| 508 |
+
0.3845251202583313,
|
| 509 |
+
0.06875818967819214,
|
| 510 |
+
0.09827672690153122,
|
| 511 |
+
0.10889741778373718,
|
| 512 |
+
0.3346223533153534,
|
| 513 |
+
0.15245452523231506,
|
| 514 |
+
0.241925910115242,
|
| 515 |
+
0.2739580273628235,
|
| 516 |
+
0.4007769823074341,
|
| 517 |
+
0.20697632431983948,
|
| 518 |
+
0.30625119805336
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.4779528081417084,
|
| 522 |
+
0.17402252554893494,
|
| 523 |
+
0.412266343832016,
|
| 524 |
+
0.9999450445175171,
|
| 525 |
+
0.6999034285545349,
|
| 526 |
+
0.4170636534690857,
|
| 527 |
+
0.4215781092643738,
|
| 528 |
+
1.0,
|
| 529 |
+
0.932714581489563,
|
| 530 |
+
1.3076640367507935,
|
| 531 |
+
0.48697036504745483,
|
| 532 |
+
0.34565815329551697,
|
| 533 |
+
0.415988564491272,
|
| 534 |
+
0.9998390078544617,
|
| 535 |
+
0.5593472123146057,
|
| 536 |
+
0.20724913477897644,
|
| 537 |
+
0.26142606139183044,
|
| 538 |
+
1.0,
|
| 539 |
+
0.5777683854103088,
|
| 540 |
+
1.3103067874908447
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.12448469549417496,
|
| 544 |
+
-0.30521926283836365,
|
| 545 |
+
-0.004976626019924879,
|
| 546 |
+
-0.21920020878314972,
|
| 547 |
+
-0.5096501708030701,
|
| 548 |
+
-1.0,
|
| 549 |
+
-0.9999982118606567,
|
| 550 |
+
-0.6262368559837341,
|
| 551 |
+
-0.7456304430961609,
|
| 552 |
+
-1.1091713905334473,
|
| 553 |
+
0.1249726265668869,
|
| 554 |
+
-0.22351478040218353,
|
| 555 |
+
-0.006724653299897909,
|
| 556 |
+
-0.36625856161117554,
|
| 557 |
+
-0.4249938726425171,
|
| 558 |
+
-0.9999956488609314,
|
| 559 |
+
-0.999992847442627,
|
| 560 |
+
-0.77183997631073,
|
| 561 |
+
-0.9583328366279602,
|
| 562 |
+
-1.04777991771698
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.19817760735750198,
|
| 566 |
+
-0.2323527842760086,
|
| 567 |
+
-0.004393648169934749,
|
| 568 |
+
-0.14680973589420318,
|
| 569 |
+
-0.1899831309914589,
|
| 570 |
+
-0.9998269140720367,
|
| 571 |
+
-0.9998353064060211,
|
| 572 |
+
-0.49930458664894106,
|
| 573 |
+
-0.5963611721992492,
|
| 574 |
+
-1.0807034492492675,
|
| 575 |
+
0.20821888744831085,
|
| 576 |
+
-0.13953636586666107,
|
| 577 |
+
-0.0033576888265088203,
|
| 578 |
+
-0.1788107320666313,
|
| 579 |
+
-0.22050866037607195,
|
| 580 |
+
-0.999310964345932,
|
| 581 |
+
-0.9993988335132599,
|
| 582 |
+
-0.5106797099113465,
|
| 583 |
+
-0.7302295982837677,
|
| 584 |
+
0.05842937603592872
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.44812404513359066,
|
| 588 |
+
0.14210240542888639,
|
| 589 |
+
0.337252739071846,
|
| 590 |
+
0.9943239092826842,
|
| 591 |
+
0.5118523061275481,
|
| 592 |
+
0.031205366365610953,
|
| 593 |
+
0.04714705012738701,
|
| 594 |
+
0.992770653963089,
|
| 595 |
+
0.18282963484525644,
|
| 596 |
+
1.1270769238471985,
|
| 597 |
+
0.47021201252937317,
|
| 598 |
+
0.2550090014934537,
|
| 599 |
+
0.3824465185403823,
|
| 600 |
+
0.995180070400238,
|
| 601 |
+
0.46117363572120657,
|
| 602 |
+
-0.047424964234233064,
|
| 603 |
+
-3.7679112665500725e-06,
|
| 604 |
+
0.9869830250740051,
|
| 605 |
+
0.23266565054655072,
|
| 606 |
+
1.1919615149497986
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 12911,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
2e-5/twinvla-aloha_shoes_table/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a57f9a891e27b6f51bc90a4252de9c8ac6dbdd21c49456fd501405bfdd1589fa
|
| 3 |
+
size 2889539864
|
2e-5/twinvla-aloha_shoes_table/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": "428b4d21376ff21d70b8b8830db6f6ab3907bfd8",
|
| 3 |
+
"_name_or_path": "jellyho/TwinVLA",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "false",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"dataset_statistics_path": null,
|
| 14 |
+
"denoiser": "FM",
|
| 15 |
+
"diffusion_batch": 32,
|
| 16 |
+
"dit_size": "DiT-B",
|
| 17 |
+
"downsample_ratio": 0.5,
|
| 18 |
+
"dynamic_image_size": true,
|
| 19 |
+
"efficient_loss": true,
|
| 20 |
+
"enable_cfg": true,
|
| 21 |
+
"force_image_size": 448,
|
| 22 |
+
"global_normalization": true,
|
| 23 |
+
"hz_interpolate": 20,
|
| 24 |
+
"image_size": 224,
|
| 25 |
+
"interpolate_gripper": false,
|
| 26 |
+
"keep_aspect_ratio": false,
|
| 27 |
+
"knowledge_insulation": false,
|
| 28 |
+
"llm_config": {
|
| 29 |
+
"_attn_implementation_autoset": true,
|
| 30 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 31 |
+
"add_cross_attention": false,
|
| 32 |
+
"architectures": [
|
| 33 |
+
"Qwen2ForCausalLM"
|
| 34 |
+
],
|
| 35 |
+
"attention_dropout": 0.0,
|
| 36 |
+
"auto_map": {
|
| 37 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 38 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 39 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 40 |
+
},
|
| 41 |
+
"bad_words_ids": null,
|
| 42 |
+
"begin_suppress_tokens": null,
|
| 43 |
+
"bos_token_id": 151643,
|
| 44 |
+
"chunk_size_feed_forward": 0,
|
| 45 |
+
"cross_attention_hidden_size": null,
|
| 46 |
+
"decoder_start_token_id": null,
|
| 47 |
+
"diversity_penalty": 0.0,
|
| 48 |
+
"do_sample": false,
|
| 49 |
+
"early_stopping": false,
|
| 50 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 51 |
+
"eos_token_id": 151645,
|
| 52 |
+
"exponential_decay_length_penalty": null,
|
| 53 |
+
"finetuning_task": null,
|
| 54 |
+
"forced_bos_token_id": null,
|
| 55 |
+
"forced_eos_token_id": null,
|
| 56 |
+
"hidden_act": "silu",
|
| 57 |
+
"hidden_size": 896,
|
| 58 |
+
"id2label": {
|
| 59 |
+
"0": "LABEL_0",
|
| 60 |
+
"1": "LABEL_1"
|
| 61 |
+
},
|
| 62 |
+
"initializer_range": 0.02,
|
| 63 |
+
"intermediate_size": 4864,
|
| 64 |
+
"is_decoder": false,
|
| 65 |
+
"is_encoder_decoder": false,
|
| 66 |
+
"label2id": {
|
| 67 |
+
"LABEL_0": 0,
|
| 68 |
+
"LABEL_1": 1
|
| 69 |
+
},
|
| 70 |
+
"length_penalty": 1.0,
|
| 71 |
+
"max_length": 20,
|
| 72 |
+
"max_position_embeddings": 32768,
|
| 73 |
+
"max_window_layers": 21,
|
| 74 |
+
"min_length": 0,
|
| 75 |
+
"model_type": "qwen2",
|
| 76 |
+
"no_repeat_ngram_size": 0,
|
| 77 |
+
"num_attention_heads": 14,
|
| 78 |
+
"num_beam_groups": 1,
|
| 79 |
+
"num_beams": 1,
|
| 80 |
+
"num_hidden_layers": 24,
|
| 81 |
+
"num_key_value_heads": 2,
|
| 82 |
+
"num_return_sequences": 1,
|
| 83 |
+
"output_attentions": false,
|
| 84 |
+
"output_hidden_states": false,
|
| 85 |
+
"output_scores": false,
|
| 86 |
+
"pad_token_id": null,
|
| 87 |
+
"prefix": null,
|
| 88 |
+
"problem_type": null,
|
| 89 |
+
"pruned_heads": {},
|
| 90 |
+
"remove_invalid_values": false,
|
| 91 |
+
"repetition_penalty": 1.0,
|
| 92 |
+
"return_dict": true,
|
| 93 |
+
"return_dict_in_generate": false,
|
| 94 |
+
"rms_norm_eps": 1e-06,
|
| 95 |
+
"rope_scaling": null,
|
| 96 |
+
"rope_theta": 1000000.0,
|
| 97 |
+
"sep_token_id": null,
|
| 98 |
+
"sliding_window": 32768,
|
| 99 |
+
"suppress_tokens": null,
|
| 100 |
+
"task_specific_params": null,
|
| 101 |
+
"temperature": 1.0,
|
| 102 |
+
"tf_legacy_loss": false,
|
| 103 |
+
"tie_encoder_decoder": false,
|
| 104 |
+
"tie_word_embeddings": true,
|
| 105 |
+
"tokenizer_class": null,
|
| 106 |
+
"top_k": 50,
|
| 107 |
+
"top_p": 1.0,
|
| 108 |
+
"torch_dtype": "bfloat16",
|
| 109 |
+
"torchscript": false,
|
| 110 |
+
"transformers_version": "4.50.0.dev0",
|
| 111 |
+
"typical_p": 1.0,
|
| 112 |
+
"use_bfloat16": false,
|
| 113 |
+
"use_cache": false,
|
| 114 |
+
"use_sliding_window": false,
|
| 115 |
+
"vocab_size": 151674
|
| 116 |
+
},
|
| 117 |
+
"loss_version": "v4",
|
| 118 |
+
"max_dynamic_patch": 12,
|
| 119 |
+
"min_dynamic_patch": 1,
|
| 120 |
+
"mlp_checkpoint": true,
|
| 121 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 122 |
+
"model_type": "Eagle2_1BVLA",
|
| 123 |
+
"modeling": "denoising",
|
| 124 |
+
"normalization": "quantile",
|
| 125 |
+
"num_readouts": 1,
|
| 126 |
+
"pad2square": false,
|
| 127 |
+
"pre_feature_reduction": false,
|
| 128 |
+
"ps_version": "v2",
|
| 129 |
+
"readout_token_as_eos": false,
|
| 130 |
+
"return_text": null,
|
| 131 |
+
"select_layer": -1,
|
| 132 |
+
"state_dim": 10,
|
| 133 |
+
"stopping_token": "|",
|
| 134 |
+
"template": "qwen2-chat",
|
| 135 |
+
"test_denoising_steps": 10,
|
| 136 |
+
"torch_dtype": "bfloat16",
|
| 137 |
+
"train_denoising_steps": 100,
|
| 138 |
+
"transformers_version": null,
|
| 139 |
+
"use_backbone_lora": 0,
|
| 140 |
+
"use_llm_lora": 0,
|
| 141 |
+
"use_thumbnail": true,
|
| 142 |
+
"vision_config": {
|
| 143 |
+
"_attn_implementation_autoset": true,
|
| 144 |
+
"_name_or_path": "",
|
| 145 |
+
"add_cross_attention": false,
|
| 146 |
+
"architectures": [
|
| 147 |
+
"SiglipVisionModel"
|
| 148 |
+
],
|
| 149 |
+
"attention_dropout": 0.0,
|
| 150 |
+
"auto_map": {
|
| 151 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 152 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 153 |
+
},
|
| 154 |
+
"bad_words_ids": null,
|
| 155 |
+
"begin_suppress_tokens": null,
|
| 156 |
+
"bos_token_id": null,
|
| 157 |
+
"chunk_size_feed_forward": 0,
|
| 158 |
+
"cross_attention_hidden_size": null,
|
| 159 |
+
"decoder_start_token_id": null,
|
| 160 |
+
"diversity_penalty": 0.0,
|
| 161 |
+
"do_sample": false,
|
| 162 |
+
"drop_path_rate": 0.1,
|
| 163 |
+
"early_stopping": false,
|
| 164 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 165 |
+
"eos_token_id": null,
|
| 166 |
+
"exponential_decay_length_penalty": null,
|
| 167 |
+
"finetuning_task": null,
|
| 168 |
+
"forced_bos_token_id": null,
|
| 169 |
+
"forced_eos_token_id": null,
|
| 170 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 171 |
+
"hidden_size": 1152,
|
| 172 |
+
"id2label": {
|
| 173 |
+
"0": "LABEL_0",
|
| 174 |
+
"1": "LABEL_1"
|
| 175 |
+
},
|
| 176 |
+
"image_size": 448,
|
| 177 |
+
"intermediate_size": 4304,
|
| 178 |
+
"is_decoder": false,
|
| 179 |
+
"is_encoder_decoder": false,
|
| 180 |
+
"label2id": {
|
| 181 |
+
"LABEL_0": 0,
|
| 182 |
+
"LABEL_1": 1
|
| 183 |
+
},
|
| 184 |
+
"layer_norm_eps": 1e-06,
|
| 185 |
+
"length_penalty": 1.0,
|
| 186 |
+
"max_length": 20,
|
| 187 |
+
"min_length": 0,
|
| 188 |
+
"model_type": "siglip_vision_model",
|
| 189 |
+
"no_repeat_ngram_size": 0,
|
| 190 |
+
"num_attention_heads": 16,
|
| 191 |
+
"num_beam_groups": 1,
|
| 192 |
+
"num_beams": 1,
|
| 193 |
+
"num_channels": 3,
|
| 194 |
+
"num_hidden_layers": 27,
|
| 195 |
+
"num_image_tokens": 1024,
|
| 196 |
+
"num_return_sequences": 1,
|
| 197 |
+
"output_attentions": false,
|
| 198 |
+
"output_hidden_states": false,
|
| 199 |
+
"output_scores": false,
|
| 200 |
+
"pad_token_id": null,
|
| 201 |
+
"patch_size": 14,
|
| 202 |
+
"prefix": null,
|
| 203 |
+
"problem_type": null,
|
| 204 |
+
"projection_dim": 2048,
|
| 205 |
+
"projector_hidden_act": "gelu_fast",
|
| 206 |
+
"pruned_heads": {},
|
| 207 |
+
"remove_invalid_values": false,
|
| 208 |
+
"repetition_penalty": 1.0,
|
| 209 |
+
"return_dict": true,
|
| 210 |
+
"return_dict_in_generate": false,
|
| 211 |
+
"sep_token_id": null,
|
| 212 |
+
"suppress_tokens": null,
|
| 213 |
+
"task_specific_params": null,
|
| 214 |
+
"temperature": 1.0,
|
| 215 |
+
"tf_legacy_loss": false,
|
| 216 |
+
"tie_encoder_decoder": false,
|
| 217 |
+
"tie_word_embeddings": true,
|
| 218 |
+
"tokenizer_class": null,
|
| 219 |
+
"top_k": 50,
|
| 220 |
+
"top_p": 1.0,
|
| 221 |
+
"torch_dtype": "bfloat16",
|
| 222 |
+
"torchscript": false,
|
| 223 |
+
"transformers_version": "4.50.0.dev0",
|
| 224 |
+
"typical_p": 1.0,
|
| 225 |
+
"use_bfloat16": false,
|
| 226 |
+
"vision_use_head": false
|
| 227 |
+
},
|
| 228 |
+
"vocab_size": 151674,
|
| 229 |
+
"vocab_start": null
|
| 230 |
+
}
|
2e-5/twinvla-scratch-aloha_dish_drainer/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
2e-5/twinvla-scratch-aloha_dish_drainer/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_dish_drainer_new": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.40166160464286804,
|
| 6 |
+
-0.011121422052383423,
|
| 7 |
+
0.18381044268608093,
|
| 8 |
+
0.7193055748939514,
|
| 9 |
+
0.25373342633247375,
|
| 10 |
+
-0.5631368160247803,
|
| 11 |
+
-0.1440804898738861,
|
| 12 |
+
0.8170215487480164,
|
| 13 |
+
0.1759030520915985,
|
| 14 |
+
0.3052484393119812,
|
| 15 |
+
0.27496522665023804,
|
| 16 |
+
0.07536163926124573,
|
| 17 |
+
0.11210401356220245,
|
| 18 |
+
0.5866131782531738,
|
| 19 |
+
0.166164368391037,
|
| 20 |
+
-0.6815540790557861,
|
| 21 |
+
-0.029566073790192604,
|
| 22 |
+
0.9651414752006531,
|
| 23 |
+
0.16927561163902283,
|
| 24 |
+
-0.015535339713096619
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.11050388216972351,
|
| 28 |
+
0.09560802578926086,
|
| 29 |
+
0.07149423658847809,
|
| 30 |
+
0.16429585218429565,
|
| 31 |
+
0.23607663810253143,
|
| 32 |
+
0.13553042709827423,
|
| 33 |
+
0.4136315882205963,
|
| 34 |
+
0.16760703921318054,
|
| 35 |
+
0.28564009070396423,
|
| 36 |
+
0.9522888660430908,
|
| 37 |
+
0.031309936195611954,
|
| 38 |
+
0.04574710130691528,
|
| 39 |
+
0.0856705829501152,
|
| 40 |
+
0.29802361130714417,
|
| 41 |
+
0.15602004528045654,
|
| 42 |
+
0.22492949664592743,
|
| 43 |
+
0.10802315920591354,
|
| 44 |
+
0.041615039110183716,
|
| 45 |
+
0.15993009507656097,
|
| 46 |
+
0.9998806715011597
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.6568294763565063,
|
| 50 |
+
0.20922525227069855,
|
| 51 |
+
0.329291433095932,
|
| 52 |
+
0.9988790154457092,
|
| 53 |
+
0.8221861720085144,
|
| 54 |
+
-0.02126980759203434,
|
| 55 |
+
0.554952085018158,
|
| 56 |
+
0.9999961256980896,
|
| 57 |
+
0.8352594971656799,
|
| 58 |
+
1.0,
|
| 59 |
+
0.3725535273551941,
|
| 60 |
+
0.20133008062839508,
|
| 61 |
+
0.2683204710483551,
|
| 62 |
+
0.9969081878662109,
|
| 63 |
+
0.5947288274765015,
|
| 64 |
+
0.135818213224411,
|
| 65 |
+
0.297533243894577,
|
| 66 |
+
0.9999833106994629,
|
| 67 |
+
0.6284497380256653,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1679762601852417,
|
| 72 |
+
-0.2037276178598404,
|
| 73 |
+
0.026118876412510872,
|
| 74 |
+
0.06734701991081238,
|
| 75 |
+
-0.3303077816963196,
|
| 76 |
+
-0.865761399269104,
|
| 77 |
+
-0.9697803854942322,
|
| 78 |
+
0.24385260045528412,
|
| 79 |
+
-0.3337814211845398,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.17690593004226685,
|
| 82 |
+
-0.019342761486768723,
|
| 83 |
+
-0.045900676399469376,
|
| 84 |
+
-0.08388058096170425,
|
| 85 |
+
-0.1825810670852661,
|
| 86 |
+
-0.9999706149101257,
|
| 87 |
+
-0.4282298684120178,
|
| 88 |
+
0.7756603956222534,
|
| 89 |
+
-0.19046637415885925,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.22041156470775605,
|
| 94 |
+
-0.17958899974822998,
|
| 95 |
+
0.04473079532384872,
|
| 96 |
+
0.2284090793132782,
|
| 97 |
+
-0.2088965356349945,
|
| 98 |
+
-0.811203727722168,
|
| 99 |
+
-0.9306126594543457,
|
| 100 |
+
0.3530711317062378,
|
| 101 |
+
-0.2207678198814392,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.2055837804079056,
|
| 104 |
+
0.005079864375293255,
|
| 105 |
+
-0.04285515695810318,
|
| 106 |
+
0.023393160849809646,
|
| 107 |
+
-0.12909780085086822,
|
| 108 |
+
-0.9969730639457702,
|
| 109 |
+
-0.31871861577033994,
|
| 110 |
+
0.8128526282310485,
|
| 111 |
+
-0.12555764615535736,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.6256548881530761,
|
| 116 |
+
0.16506216526031484,
|
| 117 |
+
0.3053938007354736,
|
| 118 |
+
0.9577780866622918,
|
| 119 |
+
0.7322160029411315,
|
| 120 |
+
-0.22335838973522296,
|
| 121 |
+
0.43161325573921194,
|
| 122 |
+
0.9983374524116516,
|
| 123 |
+
0.7683744573593138,
|
| 124 |
+
1.0,
|
| 125 |
+
0.344862767457962,
|
| 126 |
+
0.19341405749320983,
|
| 127 |
+
0.24164194464683514,
|
| 128 |
+
0.9684402346611023,
|
| 129 |
+
0.5674381494522094,
|
| 130 |
+
-0.24195577383041442,
|
| 131 |
+
0.2379095745086669,
|
| 132 |
+
0.9997554516792297,
|
| 133 |
+
0.564831252098083,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.3995276093482971,
|
| 162 |
+
-0.01743783988058567,
|
| 163 |
+
0.17103439569473267,
|
| 164 |
+
0.7105359435081482,
|
| 165 |
+
0.238600954413414,
|
| 166 |
+
-0.5759879946708679,
|
| 167 |
+
-0.11663730442523956,
|
| 168 |
+
0.8159795999526978,
|
| 169 |
+
0.18427881598472595,
|
| 170 |
+
0.32329148054122925,
|
| 171 |
+
0.26429733633995056,
|
| 172 |
+
0.056828975677490234,
|
| 173 |
+
0.10836688429117203,
|
| 174 |
+
0.5435153841972351,
|
| 175 |
+
0.10053253918886185,
|
| 176 |
+
-0.7011978030204773,
|
| 177 |
+
-0.03383757919073105,
|
| 178 |
+
0.9509052038192749,
|
| 179 |
+
0.0682743564248085,
|
| 180 |
+
-0.11205186694860458
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.10546877980232239,
|
| 184 |
+
0.09407489001750946,
|
| 185 |
+
0.07594858855009079,
|
| 186 |
+
0.15545178949832916,
|
| 187 |
+
0.23370550572872162,
|
| 188 |
+
0.1663108617067337,
|
| 189 |
+
0.417312353849411,
|
| 190 |
+
0.1589633673429489,
|
| 191 |
+
0.29529041051864624,
|
| 192 |
+
0.8386000394821167,
|
| 193 |
+
0.03193666785955429,
|
| 194 |
+
0.03702627867460251,
|
| 195 |
+
0.08499232679605484,
|
| 196 |
+
0.33746665716171265,
|
| 197 |
+
0.13817641139030457,
|
| 198 |
+
0.2642515003681183,
|
| 199 |
+
0.13742685317993164,
|
| 200 |
+
0.10328594595193863,
|
| 201 |
+
0.24581077694892883,
|
| 202 |
+
0.9880411624908447
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.6216338276863098,
|
| 206 |
+
0.1681845635175705,
|
| 207 |
+
0.3582729399204254,
|
| 208 |
+
0.9998778104782104,
|
| 209 |
+
0.7569742202758789,
|
| 210 |
+
0.29317960143089294,
|
| 211 |
+
0.5474420785903931,
|
| 212 |
+
1.0,
|
| 213 |
+
0.9644882678985596,
|
| 214 |
+
1.2399240732192993,
|
| 215 |
+
0.36810019612312317,
|
| 216 |
+
0.15229015052318573,
|
| 217 |
+
0.3755773603916168,
|
| 218 |
+
0.9999530911445618,
|
| 219 |
+
0.47173869609832764,
|
| 220 |
+
0.4396477937698364,
|
| 221 |
+
0.5856077671051025,
|
| 222 |
+
1.0,
|
| 223 |
+
0.9141661524772644,
|
| 224 |
+
1.0335123538970947
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.17779576778411865,
|
| 228 |
+
-0.2223799079656601,
|
| 229 |
+
0.009585360996425152,
|
| 230 |
+
0.27525120973587036,
|
| 231 |
+
-0.3401731848716736,
|
| 232 |
+
-0.8740139603614807,
|
| 233 |
+
-0.922980010509491,
|
| 234 |
+
0.20966650545597076,
|
| 235 |
+
-0.5117865800857544,
|
| 236 |
+
-1.04777991771698,
|
| 237 |
+
0.13721425831317902,
|
| 238 |
+
-0.11607959121465683,
|
| 239 |
+
-0.006126723252236843,
|
| 240 |
+
-0.12117788940668106,
|
| 241 |
+
-0.5865428447723389,
|
| 242 |
+
-0.9999897480010986,
|
| 243 |
+
-0.48856121301651,
|
| 244 |
+
-0.09543908387422562,
|
| 245 |
+
-0.9954046607017517,
|
| 246 |
+
-1.1056499481201172
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.22190109610557557,
|
| 250 |
+
-0.19557971894741058,
|
| 251 |
+
0.02071425139904022,
|
| 252 |
+
0.33727509021759033,
|
| 253 |
+
-0.20722176849842072,
|
| 254 |
+
-0.8324707460403442,
|
| 255 |
+
-0.8625615048408508,
|
| 256 |
+
0.48607451796531675,
|
| 257 |
+
-0.30660848736763,
|
| 258 |
+
-0.9315443730354309,
|
| 259 |
+
0.19041878879070281,
|
| 260 |
+
-0.04380948930978775,
|
| 261 |
+
-0.0050327684171497826,
|
| 262 |
+
-0.05638677150011063,
|
| 263 |
+
-0.26807846426963805,
|
| 264 |
+
-0.9989835453033448,
|
| 265 |
+
-0.329305921792984,
|
| 266 |
+
0.4013799297809601,
|
| 267 |
+
-0.8769975972175598,
|
| 268 |
+
-1.088063154220581
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.6066525983810425,
|
| 272 |
+
0.13703446269035335,
|
| 273 |
+
0.3049686551094055,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.6961594343185422,
|
| 276 |
+
0.0980641171336174,
|
| 277 |
+
0.463529108762741,
|
| 278 |
+
0.9997917461395264,
|
| 279 |
+
0.7787499904632564,
|
| 280 |
+
1.0201601552963255,
|
| 281 |
+
0.3442830562591551,
|
| 282 |
+
0.1231292974948883,
|
| 283 |
+
0.30282683849334713,
|
| 284 |
+
0.9978944087028503,
|
| 285 |
+
0.41937308669090234,
|
| 286 |
+
0.0980641171336174,
|
| 287 |
+
0.27175513744354135,
|
| 288 |
+
0.999885528087616,
|
| 289 |
+
0.5146499085426329,
|
| 290 |
+
1.0030832004547119
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 7145,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_dish_drainer": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.40166160464286804,
|
| 322 |
+
-0.011121422052383423,
|
| 323 |
+
0.18381044268608093,
|
| 324 |
+
0.7193055748939514,
|
| 325 |
+
0.25373342633247375,
|
| 326 |
+
-0.5631368160247803,
|
| 327 |
+
-0.1440804898738861,
|
| 328 |
+
0.8170215487480164,
|
| 329 |
+
0.1759030520915985,
|
| 330 |
+
0.3052484393119812,
|
| 331 |
+
0.27496522665023804,
|
| 332 |
+
0.07536163926124573,
|
| 333 |
+
0.11210401356220245,
|
| 334 |
+
0.5866131782531738,
|
| 335 |
+
0.166164368391037,
|
| 336 |
+
-0.6815540790557861,
|
| 337 |
+
-0.029566073790192604,
|
| 338 |
+
0.9651414752006531,
|
| 339 |
+
0.16927561163902283,
|
| 340 |
+
-0.015535339713096619
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.11050388216972351,
|
| 344 |
+
0.09560802578926086,
|
| 345 |
+
0.07149423658847809,
|
| 346 |
+
0.16429585218429565,
|
| 347 |
+
0.23607663810253143,
|
| 348 |
+
0.13553042709827423,
|
| 349 |
+
0.4136315882205963,
|
| 350 |
+
0.16760703921318054,
|
| 351 |
+
0.28564009070396423,
|
| 352 |
+
0.9522888660430908,
|
| 353 |
+
0.031309936195611954,
|
| 354 |
+
0.04574710130691528,
|
| 355 |
+
0.0856705829501152,
|
| 356 |
+
0.29802361130714417,
|
| 357 |
+
0.15602004528045654,
|
| 358 |
+
0.22492949664592743,
|
| 359 |
+
0.10802315920591354,
|
| 360 |
+
0.041615039110183716,
|
| 361 |
+
0.15993009507656097,
|
| 362 |
+
0.9998806715011597
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.6568294763565063,
|
| 366 |
+
0.20922525227069855,
|
| 367 |
+
0.329291433095932,
|
| 368 |
+
0.9988790154457092,
|
| 369 |
+
0.8221861720085144,
|
| 370 |
+
-0.02126980759203434,
|
| 371 |
+
0.554952085018158,
|
| 372 |
+
0.9999961256980896,
|
| 373 |
+
0.8352594971656799,
|
| 374 |
+
1.0,
|
| 375 |
+
0.3725535273551941,
|
| 376 |
+
0.20133008062839508,
|
| 377 |
+
0.2683204710483551,
|
| 378 |
+
0.9969081878662109,
|
| 379 |
+
0.5947288274765015,
|
| 380 |
+
0.135818213224411,
|
| 381 |
+
0.297533243894577,
|
| 382 |
+
0.9999833106994629,
|
| 383 |
+
0.6284497380256653,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1679762601852417,
|
| 388 |
+
-0.2037276178598404,
|
| 389 |
+
0.026118876412510872,
|
| 390 |
+
0.06734701991081238,
|
| 391 |
+
-0.3303077816963196,
|
| 392 |
+
-0.865761399269104,
|
| 393 |
+
-0.9697803854942322,
|
| 394 |
+
0.24385260045528412,
|
| 395 |
+
-0.3337814211845398,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.17690593004226685,
|
| 398 |
+
-0.019342761486768723,
|
| 399 |
+
-0.045900676399469376,
|
| 400 |
+
-0.08388058096170425,
|
| 401 |
+
-0.1825810670852661,
|
| 402 |
+
-0.9999706149101257,
|
| 403 |
+
-0.4282298684120178,
|
| 404 |
+
0.7756603956222534,
|
| 405 |
+
-0.19046637415885925,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.22041156470775605,
|
| 410 |
+
-0.17958899974822998,
|
| 411 |
+
0.04473079532384872,
|
| 412 |
+
0.2284090793132782,
|
| 413 |
+
-0.2088965356349945,
|
| 414 |
+
-0.811203727722168,
|
| 415 |
+
-0.9306126594543457,
|
| 416 |
+
0.3530711317062378,
|
| 417 |
+
-0.2207678198814392,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.2055837804079056,
|
| 420 |
+
0.005079864375293255,
|
| 421 |
+
-0.04285515695810318,
|
| 422 |
+
0.023393160849809646,
|
| 423 |
+
-0.12909780085086822,
|
| 424 |
+
-0.9969730639457702,
|
| 425 |
+
-0.31871861577033994,
|
| 426 |
+
0.8128526282310485,
|
| 427 |
+
-0.12555764615535736,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.6256548881530761,
|
| 432 |
+
0.16506216526031484,
|
| 433 |
+
0.3053938007354736,
|
| 434 |
+
0.9577780866622918,
|
| 435 |
+
0.7322160029411315,
|
| 436 |
+
-0.22335838973522296,
|
| 437 |
+
0.43161325573921194,
|
| 438 |
+
0.9983374524116516,
|
| 439 |
+
0.7683744573593138,
|
| 440 |
+
1.0,
|
| 441 |
+
0.344862767457962,
|
| 442 |
+
0.19341405749320983,
|
| 443 |
+
0.24164194464683514,
|
| 444 |
+
0.9684402346611023,
|
| 445 |
+
0.5674381494522094,
|
| 446 |
+
-0.24195577383041442,
|
| 447 |
+
0.2379095745086669,
|
| 448 |
+
0.9997554516792297,
|
| 449 |
+
0.564831252098083,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.3995276093482971,
|
| 478 |
+
-0.01743783988058567,
|
| 479 |
+
0.17103439569473267,
|
| 480 |
+
0.7105359435081482,
|
| 481 |
+
0.238600954413414,
|
| 482 |
+
-0.5759879946708679,
|
| 483 |
+
-0.11663730442523956,
|
| 484 |
+
0.8159795999526978,
|
| 485 |
+
0.18427881598472595,
|
| 486 |
+
0.32329148054122925,
|
| 487 |
+
0.26429733633995056,
|
| 488 |
+
0.056828975677490234,
|
| 489 |
+
0.10836688429117203,
|
| 490 |
+
0.5435153841972351,
|
| 491 |
+
0.10053253918886185,
|
| 492 |
+
-0.7011978030204773,
|
| 493 |
+
-0.03383757919073105,
|
| 494 |
+
0.9509052038192749,
|
| 495 |
+
0.0682743564248085,
|
| 496 |
+
-0.11205186694860458
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.10546877980232239,
|
| 500 |
+
0.09407489001750946,
|
| 501 |
+
0.07594858855009079,
|
| 502 |
+
0.15545178949832916,
|
| 503 |
+
0.23370550572872162,
|
| 504 |
+
0.1663108617067337,
|
| 505 |
+
0.417312353849411,
|
| 506 |
+
0.1589633673429489,
|
| 507 |
+
0.29529041051864624,
|
| 508 |
+
0.8386000394821167,
|
| 509 |
+
0.03193666785955429,
|
| 510 |
+
0.03702627867460251,
|
| 511 |
+
0.08499232679605484,
|
| 512 |
+
0.33746665716171265,
|
| 513 |
+
0.13817641139030457,
|
| 514 |
+
0.2642515003681183,
|
| 515 |
+
0.13742685317993164,
|
| 516 |
+
0.10328594595193863,
|
| 517 |
+
0.24581077694892883,
|
| 518 |
+
0.9880411624908447
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.6216338276863098,
|
| 522 |
+
0.1681845635175705,
|
| 523 |
+
0.3582729399204254,
|
| 524 |
+
0.9998778104782104,
|
| 525 |
+
0.7569742202758789,
|
| 526 |
+
0.29317960143089294,
|
| 527 |
+
0.5474420785903931,
|
| 528 |
+
1.0,
|
| 529 |
+
0.9644882678985596,
|
| 530 |
+
1.2399240732192993,
|
| 531 |
+
0.36810019612312317,
|
| 532 |
+
0.15229015052318573,
|
| 533 |
+
0.3755773603916168,
|
| 534 |
+
0.9999530911445618,
|
| 535 |
+
0.47173869609832764,
|
| 536 |
+
0.4396477937698364,
|
| 537 |
+
0.5856077671051025,
|
| 538 |
+
1.0,
|
| 539 |
+
0.9141661524772644,
|
| 540 |
+
1.0335123538970947
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.17779576778411865,
|
| 544 |
+
-0.2223799079656601,
|
| 545 |
+
0.009585360996425152,
|
| 546 |
+
0.27525120973587036,
|
| 547 |
+
-0.3401731848716736,
|
| 548 |
+
-0.8740139603614807,
|
| 549 |
+
-0.922980010509491,
|
| 550 |
+
0.20966650545597076,
|
| 551 |
+
-0.5117865800857544,
|
| 552 |
+
-1.04777991771698,
|
| 553 |
+
0.13721425831317902,
|
| 554 |
+
-0.11607959121465683,
|
| 555 |
+
-0.006126723252236843,
|
| 556 |
+
-0.12117788940668106,
|
| 557 |
+
-0.5865428447723389,
|
| 558 |
+
-0.9999897480010986,
|
| 559 |
+
-0.48856121301651,
|
| 560 |
+
-0.09543908387422562,
|
| 561 |
+
-0.9954046607017517,
|
| 562 |
+
-1.1056499481201172
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.22190109610557557,
|
| 566 |
+
-0.19557971894741058,
|
| 567 |
+
0.02071425139904022,
|
| 568 |
+
0.33727509021759033,
|
| 569 |
+
-0.20722176849842072,
|
| 570 |
+
-0.8324707460403442,
|
| 571 |
+
-0.8625615048408508,
|
| 572 |
+
0.48607451796531675,
|
| 573 |
+
-0.30660848736763,
|
| 574 |
+
-0.9315443730354309,
|
| 575 |
+
0.19041878879070281,
|
| 576 |
+
-0.04380948930978775,
|
| 577 |
+
-0.0050327684171497826,
|
| 578 |
+
-0.05638677150011063,
|
| 579 |
+
-0.26807846426963805,
|
| 580 |
+
-0.9989835453033448,
|
| 581 |
+
-0.329305921792984,
|
| 582 |
+
0.4013799297809601,
|
| 583 |
+
-0.8769975972175598,
|
| 584 |
+
-1.088063154220581
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.6066525983810425,
|
| 588 |
+
0.13703446269035335,
|
| 589 |
+
0.3049686551094055,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.6961594343185422,
|
| 592 |
+
0.0980641171336174,
|
| 593 |
+
0.463529108762741,
|
| 594 |
+
0.9997917461395264,
|
| 595 |
+
0.7787499904632564,
|
| 596 |
+
1.0201601552963255,
|
| 597 |
+
0.3442830562591551,
|
| 598 |
+
0.1231292974948883,
|
| 599 |
+
0.30282683849334713,
|
| 600 |
+
0.9978944087028503,
|
| 601 |
+
0.41937308669090234,
|
| 602 |
+
0.0980641171336174,
|
| 603 |
+
0.27175513744354135,
|
| 604 |
+
0.999885528087616,
|
| 605 |
+
0.5146499085426329,
|
| 606 |
+
1.0030832004547119
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 7145,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
2e-5/twinvla-scratch-aloha_dish_drainer/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6ce8e400165455003d78690e7c4c24eb46ad2b6febf7d1b4396cba4383e28c7
|
| 3 |
+
size 2889536104
|
2e-5/twinvla-scratch-aloha_dish_drainer/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/scratch2/jellyho/rebuttal/singlevla-work/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
2e-5/twinvla-scratch-aloha_handover_box/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
2e-5/twinvla-scratch-aloha_handover_box/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_handover_box": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.3224456310272217,
|
| 6 |
+
-0.07136797159910202,
|
| 7 |
+
0.16350853443145752,
|
| 8 |
+
0.6155848503112793,
|
| 9 |
+
0.02582639828324318,
|
| 10 |
+
-0.6291783452033997,
|
| 11 |
+
-0.12873497605323792,
|
| 12 |
+
0.9684513211250305,
|
| 13 |
+
-0.05324755236506462,
|
| 14 |
+
0.32048356533050537,
|
| 15 |
+
0.3553532660007477,
|
| 16 |
+
-0.01726912148296833,
|
| 17 |
+
0.2535472810268402,
|
| 18 |
+
0.9467610120773315,
|
| 19 |
+
-0.10934814065694809,
|
| 20 |
+
-0.16364224255084991,
|
| 21 |
+
0.1080813780426979,
|
| 22 |
+
0.9652291536331177,
|
| 23 |
+
-0.08225563168525696,
|
| 24 |
+
0.6809535622596741
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.07454729825258255,
|
| 28 |
+
0.08869028091430664,
|
| 29 |
+
0.07996603846549988,
|
| 30 |
+
0.33460596203804016,
|
| 31 |
+
0.19985826313495636,
|
| 32 |
+
0.26947179436683655,
|
| 33 |
+
0.12514576315879822,
|
| 34 |
+
0.030899839475750923,
|
| 35 |
+
0.16146357357501984,
|
| 36 |
+
0.9473047256469727,
|
| 37 |
+
0.064877949655056,
|
| 38 |
+
0.038949884474277496,
|
| 39 |
+
0.027652490884065628,
|
| 40 |
+
0.10490523278713226,
|
| 41 |
+
0.1838432103395462,
|
| 42 |
+
0.14178058505058289,
|
| 43 |
+
0.2033252865076065,
|
| 44 |
+
0.06656654924154282,
|
| 45 |
+
0.06421922147274017,
|
| 46 |
+
0.7322930693626404
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.48683926463127136,
|
| 50 |
+
0.0484432689845562,
|
| 51 |
+
0.31490612030029297,
|
| 52 |
+
0.99891197681427,
|
| 53 |
+
0.4277522563934326,
|
| 54 |
+
0.06322141736745834,
|
| 55 |
+
0.4004654884338379,
|
| 56 |
+
0.9999857544898987,
|
| 57 |
+
0.3100079298019409,
|
| 58 |
+
1.0,
|
| 59 |
+
0.5334027409553528,
|
| 60 |
+
0.08494444936513901,
|
| 61 |
+
0.36568865180015564,
|
| 62 |
+
0.9999882578849792,
|
| 63 |
+
0.2546274662017822,
|
| 64 |
+
0.1172015443444252,
|
| 65 |
+
0.7982608079910278,
|
| 66 |
+
0.9999992251396179,
|
| 67 |
+
0.20094169676303864,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1422317922115326,
|
| 72 |
+
-0.2763901352882385,
|
| 73 |
+
-0.0600760243833065,
|
| 74 |
+
-0.14848311245441437,
|
| 75 |
+
-0.6282482743263245,
|
| 76 |
+
-0.9999129176139832,
|
| 77 |
+
-0.42181891202926636,
|
| 78 |
+
0.7404066324234009,
|
| 79 |
+
-0.6676974296569824,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.1786160171031952,
|
| 82 |
+
-0.1845615804195404,
|
| 83 |
+
0.1687021553516388,
|
| 84 |
+
0.2762398421764374,
|
| 85 |
+
-0.7479667067527771,
|
| 86 |
+
-0.8485982418060303,
|
| 87 |
+
-0.2597721517086029,
|
| 88 |
+
0.6015138626098633,
|
| 89 |
+
-0.3933228552341461,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.1950138956308365,
|
| 94 |
+
-0.24691226959228516,
|
| 95 |
+
-0.015285035967826844,
|
| 96 |
+
-0.04555398792028427,
|
| 97 |
+
-0.4452396559715271,
|
| 98 |
+
-0.996303243637085,
|
| 99 |
+
-0.3760478734970093,
|
| 100 |
+
0.8516808867454528,
|
| 101 |
+
-0.46342918753623963,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.21926841557025908,
|
| 104 |
+
-0.1317625629901886,
|
| 105 |
+
0.1978745412826538,
|
| 106 |
+
0.5117229986190795,
|
| 107 |
+
-0.6376786828041077,
|
| 108 |
+
-0.6609986042976379,
|
| 109 |
+
-0.19099083304405212,
|
| 110 |
+
0.6930621123313904,
|
| 111 |
+
-0.2356126993894577,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.47150796771049497,
|
| 116 |
+
0.038070930540561675,
|
| 117 |
+
0.28182336688041676,
|
| 118 |
+
0.9817836880683899,
|
| 119 |
+
0.3871919810771942,
|
| 120 |
+
-0.1345064049959186,
|
| 121 |
+
0.20285944879054985,
|
| 122 |
+
0.9992118668556214,
|
| 123 |
+
0.2293877118825912,
|
| 124 |
+
1.0,
|
| 125 |
+
0.49810330152511595,
|
| 126 |
+
0.0599309906363487,
|
| 127 |
+
0.3309180569648742,
|
| 128 |
+
0.9995350050926208,
|
| 129 |
+
0.1829529863595952,
|
| 130 |
+
0.03216676786541939,
|
| 131 |
+
0.7132800936698909,
|
| 132 |
+
0.9997488117218017,
|
| 133 |
+
0.08941484957933345,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.3200361728668213,
|
| 162 |
+
-0.06315236538648605,
|
| 163 |
+
0.1539715975522995,
|
| 164 |
+
0.6064432263374329,
|
| 165 |
+
0.06654595583677292,
|
| 166 |
+
-0.6403248906135559,
|
| 167 |
+
-0.09359703212976456,
|
| 168 |
+
0.962713897228241,
|
| 169 |
+
0.0318731926381588,
|
| 170 |
+
0.6514409780502319,
|
| 171 |
+
0.35154759883880615,
|
| 172 |
+
-0.018256481736898422,
|
| 173 |
+
0.23897576332092285,
|
| 174 |
+
0.9405075311660767,
|
| 175 |
+
-0.11245886981487274,
|
| 176 |
+
-0.2019508332014084,
|
| 177 |
+
0.10934195667505264,
|
| 178 |
+
0.9565085172653198,
|
| 179 |
+
-0.0809820145368576,
|
| 180 |
+
0.808542013168335
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.07218372076749802,
|
| 184 |
+
0.07983935624361038,
|
| 185 |
+
0.08212248235940933,
|
| 186 |
+
0.32560113072395325,
|
| 187 |
+
0.15540780127048492,
|
| 188 |
+
0.29600197076797485,
|
| 189 |
+
0.1470419317483902,
|
| 190 |
+
0.06898750364780426,
|
| 191 |
+
0.19242025911808014,
|
| 192 |
+
0.4760681986808777,
|
| 193 |
+
0.06400060653686523,
|
| 194 |
+
0.03751807287335396,
|
| 195 |
+
0.0323367603123188,
|
| 196 |
+
0.10509958118200302,
|
| 197 |
+
0.17797957360744476,
|
| 198 |
+
0.13889151811599731,
|
| 199 |
+
0.20048993825912476,
|
| 200 |
+
0.08273555338382721,
|
| 201 |
+
0.13973523676395416,
|
| 202 |
+
0.40020158886909485
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.47570154070854187,
|
| 206 |
+
0.08932404220104218,
|
| 207 |
+
0.44513142108917236,
|
| 208 |
+
0.9999915361404419,
|
| 209 |
+
0.6316148042678833,
|
| 210 |
+
0.7311769127845764,
|
| 211 |
+
0.5646719932556152,
|
| 212 |
+
1.0,
|
| 213 |
+
0.9345466494560242,
|
| 214 |
+
1.3299691677093506,
|
| 215 |
+
0.5250220894813538,
|
| 216 |
+
0.07912999391555786,
|
| 217 |
+
0.41775044798851013,
|
| 218 |
+
0.9999979138374329,
|
| 219 |
+
0.2288104146718979,
|
| 220 |
+
0.2556033134460449,
|
| 221 |
+
0.7930954098701477,
|
| 222 |
+
1.0,
|
| 223 |
+
0.8460071086883545,
|
| 224 |
+
1.1448447704315186
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.15507374703884125,
|
| 228 |
+
-0.24968452751636505,
|
| 229 |
+
-0.005626574158668518,
|
| 230 |
+
-0.12249666452407837,
|
| 231 |
+
-0.3874700665473938,
|
| 232 |
+
-1.0,
|
| 233 |
+
-0.8481224179267883,
|
| 234 |
+
0.28493279218673706,
|
| 235 |
+
-0.8170893788337708,
|
| 236 |
+
-1.083611011505127,
|
| 237 |
+
0.18484443426132202,
|
| 238 |
+
-0.1679670214653015,
|
| 239 |
+
0.1543029397726059,
|
| 240 |
+
0.2590605616569519,
|
| 241 |
+
-0.7203781604766846,
|
| 242 |
+
-0.8606433272361755,
|
| 243 |
+
-0.2443554699420929,
|
| 244 |
+
0.2216777801513672,
|
| 245 |
+
-0.9731146693229675,
|
| 246 |
+
-1.0848060846328735
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.1903739631175995,
|
| 250 |
+
-0.22257488489151,
|
| 251 |
+
-0.0036250025033950804,
|
| 252 |
+
-0.015333320312201977,
|
| 253 |
+
-0.2553225290775299,
|
| 254 |
+
-0.9997995805740356,
|
| 255 |
+
-0.3545967137813568,
|
| 256 |
+
0.6295642066001892,
|
| 257 |
+
-0.32733017563819883,
|
| 258 |
+
-0.4065189242362976,
|
| 259 |
+
0.22028838396072387,
|
| 260 |
+
-0.1278022611141205,
|
| 261 |
+
0.17875114858150482,
|
| 262 |
+
0.488557243347168,
|
| 263 |
+
-0.6262442255020142,
|
| 264 |
+
-0.6858670902252197,
|
| 265 |
+
-0.17815817892551422,
|
| 266 |
+
0.6348884439468384,
|
| 267 |
+
-0.5856496715545654,
|
| 268 |
+
-0.4086606001853943
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.4643457818031311,
|
| 272 |
+
0.05302721098065367,
|
| 273 |
+
0.32663319587707507,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.426870135068893,
|
| 276 |
+
0.18705489814281454,
|
| 277 |
+
0.3631119978427884,
|
| 278 |
+
0.9999364447593689,
|
| 279 |
+
0.7475578069686883,
|
| 280 |
+
1.178509011268615,
|
| 281 |
+
0.4939642870426177,
|
| 282 |
+
0.051381030380725806,
|
| 283 |
+
0.3385275864601135,
|
| 284 |
+
0.999157931804657,
|
| 285 |
+
0.16684140086173982,
|
| 286 |
+
0.05098062053322772,
|
| 287 |
+
0.7065742087364195,
|
| 288 |
+
0.9998370099067688,
|
| 289 |
+
0.5137611627578699,
|
| 290 |
+
1.0447997903823851
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 11829,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_handover_box_new": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.3224456310272217,
|
| 322 |
+
-0.07136797159910202,
|
| 323 |
+
0.16350853443145752,
|
| 324 |
+
0.6155848503112793,
|
| 325 |
+
0.02582639828324318,
|
| 326 |
+
-0.6291783452033997,
|
| 327 |
+
-0.12873497605323792,
|
| 328 |
+
0.9684513211250305,
|
| 329 |
+
-0.05324755236506462,
|
| 330 |
+
0.32048356533050537,
|
| 331 |
+
0.3553532660007477,
|
| 332 |
+
-0.01726912148296833,
|
| 333 |
+
0.2535472810268402,
|
| 334 |
+
0.9467610120773315,
|
| 335 |
+
-0.10934814065694809,
|
| 336 |
+
-0.16364224255084991,
|
| 337 |
+
0.1080813780426979,
|
| 338 |
+
0.9652291536331177,
|
| 339 |
+
-0.08225563168525696,
|
| 340 |
+
0.6809535622596741
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.07454729825258255,
|
| 344 |
+
0.08869028091430664,
|
| 345 |
+
0.07996603846549988,
|
| 346 |
+
0.33460596203804016,
|
| 347 |
+
0.19985826313495636,
|
| 348 |
+
0.26947179436683655,
|
| 349 |
+
0.12514576315879822,
|
| 350 |
+
0.030899839475750923,
|
| 351 |
+
0.16146357357501984,
|
| 352 |
+
0.9473047256469727,
|
| 353 |
+
0.064877949655056,
|
| 354 |
+
0.038949884474277496,
|
| 355 |
+
0.027652490884065628,
|
| 356 |
+
0.10490523278713226,
|
| 357 |
+
0.1838432103395462,
|
| 358 |
+
0.14178058505058289,
|
| 359 |
+
0.2033252865076065,
|
| 360 |
+
0.06656654924154282,
|
| 361 |
+
0.06421922147274017,
|
| 362 |
+
0.7322930693626404
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.48683926463127136,
|
| 366 |
+
0.0484432689845562,
|
| 367 |
+
0.31490612030029297,
|
| 368 |
+
0.99891197681427,
|
| 369 |
+
0.4277522563934326,
|
| 370 |
+
0.06322141736745834,
|
| 371 |
+
0.4004654884338379,
|
| 372 |
+
0.9999857544898987,
|
| 373 |
+
0.3100079298019409,
|
| 374 |
+
1.0,
|
| 375 |
+
0.5334027409553528,
|
| 376 |
+
0.08494444936513901,
|
| 377 |
+
0.36568865180015564,
|
| 378 |
+
0.9999882578849792,
|
| 379 |
+
0.2546274662017822,
|
| 380 |
+
0.1172015443444252,
|
| 381 |
+
0.7982608079910278,
|
| 382 |
+
0.9999992251396179,
|
| 383 |
+
0.20094169676303864,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1422317922115326,
|
| 388 |
+
-0.2763901352882385,
|
| 389 |
+
-0.0600760243833065,
|
| 390 |
+
-0.14848311245441437,
|
| 391 |
+
-0.6282482743263245,
|
| 392 |
+
-0.9999129176139832,
|
| 393 |
+
-0.42181891202926636,
|
| 394 |
+
0.7404066324234009,
|
| 395 |
+
-0.6676974296569824,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.1786160171031952,
|
| 398 |
+
-0.1845615804195404,
|
| 399 |
+
0.1687021553516388,
|
| 400 |
+
0.2762398421764374,
|
| 401 |
+
-0.7479667067527771,
|
| 402 |
+
-0.8485982418060303,
|
| 403 |
+
-0.2597721517086029,
|
| 404 |
+
0.6015138626098633,
|
| 405 |
+
-0.3933228552341461,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.1950138956308365,
|
| 410 |
+
-0.24691226959228516,
|
| 411 |
+
-0.015285035967826844,
|
| 412 |
+
-0.04555398792028427,
|
| 413 |
+
-0.4452396559715271,
|
| 414 |
+
-0.996303243637085,
|
| 415 |
+
-0.3760478734970093,
|
| 416 |
+
0.8516808867454528,
|
| 417 |
+
-0.46342918753623963,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.21926841557025908,
|
| 420 |
+
-0.1317625629901886,
|
| 421 |
+
0.1978745412826538,
|
| 422 |
+
0.5117229986190795,
|
| 423 |
+
-0.6376786828041077,
|
| 424 |
+
-0.6609986042976379,
|
| 425 |
+
-0.19099083304405212,
|
| 426 |
+
0.6930621123313904,
|
| 427 |
+
-0.2356126993894577,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.47150796771049497,
|
| 432 |
+
0.038070930540561675,
|
| 433 |
+
0.28182336688041676,
|
| 434 |
+
0.9817836880683899,
|
| 435 |
+
0.3871919810771942,
|
| 436 |
+
-0.1345064049959186,
|
| 437 |
+
0.20285944879054985,
|
| 438 |
+
0.9992118668556214,
|
| 439 |
+
0.2293877118825912,
|
| 440 |
+
1.0,
|
| 441 |
+
0.49810330152511595,
|
| 442 |
+
0.0599309906363487,
|
| 443 |
+
0.3309180569648742,
|
| 444 |
+
0.9995350050926208,
|
| 445 |
+
0.1829529863595952,
|
| 446 |
+
0.03216676786541939,
|
| 447 |
+
0.7132800936698909,
|
| 448 |
+
0.9997488117218017,
|
| 449 |
+
0.08941484957933345,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.3200361728668213,
|
| 478 |
+
-0.06315236538648605,
|
| 479 |
+
0.1539715975522995,
|
| 480 |
+
0.6064432263374329,
|
| 481 |
+
0.06654595583677292,
|
| 482 |
+
-0.6403248906135559,
|
| 483 |
+
-0.09359703212976456,
|
| 484 |
+
0.962713897228241,
|
| 485 |
+
0.0318731926381588,
|
| 486 |
+
0.6514409780502319,
|
| 487 |
+
0.35154759883880615,
|
| 488 |
+
-0.018256481736898422,
|
| 489 |
+
0.23897576332092285,
|
| 490 |
+
0.9405075311660767,
|
| 491 |
+
-0.11245886981487274,
|
| 492 |
+
-0.2019508332014084,
|
| 493 |
+
0.10934195667505264,
|
| 494 |
+
0.9565085172653198,
|
| 495 |
+
-0.0809820145368576,
|
| 496 |
+
0.808542013168335
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.07218372076749802,
|
| 500 |
+
0.07983935624361038,
|
| 501 |
+
0.08212248235940933,
|
| 502 |
+
0.32560113072395325,
|
| 503 |
+
0.15540780127048492,
|
| 504 |
+
0.29600197076797485,
|
| 505 |
+
0.1470419317483902,
|
| 506 |
+
0.06898750364780426,
|
| 507 |
+
0.19242025911808014,
|
| 508 |
+
0.4760681986808777,
|
| 509 |
+
0.06400060653686523,
|
| 510 |
+
0.03751807287335396,
|
| 511 |
+
0.0323367603123188,
|
| 512 |
+
0.10509958118200302,
|
| 513 |
+
0.17797957360744476,
|
| 514 |
+
0.13889151811599731,
|
| 515 |
+
0.20048993825912476,
|
| 516 |
+
0.08273555338382721,
|
| 517 |
+
0.13973523676395416,
|
| 518 |
+
0.40020158886909485
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.47570154070854187,
|
| 522 |
+
0.08932404220104218,
|
| 523 |
+
0.44513142108917236,
|
| 524 |
+
0.9999915361404419,
|
| 525 |
+
0.6316148042678833,
|
| 526 |
+
0.7311769127845764,
|
| 527 |
+
0.5646719932556152,
|
| 528 |
+
1.0,
|
| 529 |
+
0.9345466494560242,
|
| 530 |
+
1.3299691677093506,
|
| 531 |
+
0.5250220894813538,
|
| 532 |
+
0.07912999391555786,
|
| 533 |
+
0.41775044798851013,
|
| 534 |
+
0.9999979138374329,
|
| 535 |
+
0.2288104146718979,
|
| 536 |
+
0.2556033134460449,
|
| 537 |
+
0.7930954098701477,
|
| 538 |
+
1.0,
|
| 539 |
+
0.8460071086883545,
|
| 540 |
+
1.1448447704315186
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.15507374703884125,
|
| 544 |
+
-0.24968452751636505,
|
| 545 |
+
-0.005626574158668518,
|
| 546 |
+
-0.12249666452407837,
|
| 547 |
+
-0.3874700665473938,
|
| 548 |
+
-1.0,
|
| 549 |
+
-0.8481224179267883,
|
| 550 |
+
0.28493279218673706,
|
| 551 |
+
-0.8170893788337708,
|
| 552 |
+
-1.083611011505127,
|
| 553 |
+
0.18484443426132202,
|
| 554 |
+
-0.1679670214653015,
|
| 555 |
+
0.1543029397726059,
|
| 556 |
+
0.2590605616569519,
|
| 557 |
+
-0.7203781604766846,
|
| 558 |
+
-0.8606433272361755,
|
| 559 |
+
-0.2443554699420929,
|
| 560 |
+
0.2216777801513672,
|
| 561 |
+
-0.9731146693229675,
|
| 562 |
+
-1.0848060846328735
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.1903739631175995,
|
| 566 |
+
-0.22257488489151,
|
| 567 |
+
-0.0036250025033950804,
|
| 568 |
+
-0.015333320312201977,
|
| 569 |
+
-0.2553225290775299,
|
| 570 |
+
-0.9997995805740356,
|
| 571 |
+
-0.3545967137813568,
|
| 572 |
+
0.6295642066001892,
|
| 573 |
+
-0.32733017563819883,
|
| 574 |
+
-0.4065189242362976,
|
| 575 |
+
0.22028838396072387,
|
| 576 |
+
-0.1278022611141205,
|
| 577 |
+
0.17875114858150482,
|
| 578 |
+
0.488557243347168,
|
| 579 |
+
-0.6262442255020142,
|
| 580 |
+
-0.6858670902252197,
|
| 581 |
+
-0.17815817892551422,
|
| 582 |
+
0.6348884439468384,
|
| 583 |
+
-0.5856496715545654,
|
| 584 |
+
-0.4086606001853943
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.4643457818031311,
|
| 588 |
+
0.05302721098065367,
|
| 589 |
+
0.32663319587707507,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.426870135068893,
|
| 592 |
+
0.18705489814281454,
|
| 593 |
+
0.3631119978427884,
|
| 594 |
+
0.9999364447593689,
|
| 595 |
+
0.7475578069686883,
|
| 596 |
+
1.178509011268615,
|
| 597 |
+
0.4939642870426177,
|
| 598 |
+
0.051381030380725806,
|
| 599 |
+
0.3385275864601135,
|
| 600 |
+
0.999157931804657,
|
| 601 |
+
0.16684140086173982,
|
| 602 |
+
0.05098062053322772,
|
| 603 |
+
0.7065742087364195,
|
| 604 |
+
0.9998370099067688,
|
| 605 |
+
0.5137611627578699,
|
| 606 |
+
1.0447997903823851
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 11829,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
2e-5/twinvla-scratch-aloha_handover_box/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac71b26bd439527bce49753635984d136ea90a4514ce78738b77fe82a090911
|
| 3 |
+
size 2889536104
|
2e-5/twinvla-scratch-aloha_handover_box/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
2e-5/twinvla-scratch-aloha_handover_box/training_states.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3272c2fbc20d5db43220008c4f3ee2ac707e606bf35d2cd829e5c5feda24abbc
|
| 3 |
+
size 4126124658
|
2e-5/twinvla-scratch-aloha_lift_box/config.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"action_dim": 10,
|
| 3 |
+
"action_head": "DiT",
|
| 4 |
+
"action_len": 20,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Eagle2_1BTwinVLA"
|
| 7 |
+
],
|
| 8 |
+
"attn_reweighting": true,
|
| 9 |
+
"denoiser": "FM",
|
| 10 |
+
"dit_scratch": false,
|
| 11 |
+
"global_normalization": true,
|
| 12 |
+
"hz_interpolate": null,
|
| 13 |
+
"interpolate_gripper": false,
|
| 14 |
+
"knowledge_insulation": false,
|
| 15 |
+
"model_path": null,
|
| 16 |
+
"model_type": "Eagle2_1BTwinVLA",
|
| 17 |
+
"modeling": "denoising",
|
| 18 |
+
"normalization": "quantile",
|
| 19 |
+
"num_readouts": 1,
|
| 20 |
+
"readout_token_as_eos": true,
|
| 21 |
+
"share_decoder": true,
|
| 22 |
+
"share_embed_tokens": true,
|
| 23 |
+
"share_vision": true,
|
| 24 |
+
"singlevla_config": {
|
| 25 |
+
"_attn_implementation_autoset": false,
|
| 26 |
+
"_attn_implementation_internal": null,
|
| 27 |
+
"_commit_hash": null,
|
| 28 |
+
"_name_or_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 29 |
+
"action_dim": 10,
|
| 30 |
+
"action_head": "DiT",
|
| 31 |
+
"action_head_hidden_dim": 1024,
|
| 32 |
+
"action_len": 20,
|
| 33 |
+
"add_cross_attention": false,
|
| 34 |
+
"aggregation": "None",
|
| 35 |
+
"architectures": [
|
| 36 |
+
"Eagle2_1BVLA"
|
| 37 |
+
],
|
| 38 |
+
"auto_map": {},
|
| 39 |
+
"bad_words_ids": null,
|
| 40 |
+
"begin_suppress_tokens": null,
|
| 41 |
+
"bos_token_id": null,
|
| 42 |
+
"chunk_size_feed_forward": 0,
|
| 43 |
+
"cross_attention_hidden_size": null,
|
| 44 |
+
"decoder_start_token_id": null,
|
| 45 |
+
"denoiser": "FM",
|
| 46 |
+
"diffusion_batch": 32,
|
| 47 |
+
"dit_size": "DiT-B",
|
| 48 |
+
"diversity_penalty": 0.0,
|
| 49 |
+
"do_sample": false,
|
| 50 |
+
"downsample_ratio": 0.5,
|
| 51 |
+
"dynamic_image_size": true,
|
| 52 |
+
"early_stopping": false,
|
| 53 |
+
"efficient_loss": true,
|
| 54 |
+
"enable_cfg": true,
|
| 55 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 56 |
+
"eos_token_id": null,
|
| 57 |
+
"exponential_decay_length_penalty": null,
|
| 58 |
+
"finetuning_task": null,
|
| 59 |
+
"force_image_size": 448,
|
| 60 |
+
"forced_bos_token_id": null,
|
| 61 |
+
"forced_eos_token_id": null,
|
| 62 |
+
"global_normalization": true,
|
| 63 |
+
"id2label": {
|
| 64 |
+
"0": "LABEL_0",
|
| 65 |
+
"1": "LABEL_1"
|
| 66 |
+
},
|
| 67 |
+
"image_size": 448,
|
| 68 |
+
"is_decoder": false,
|
| 69 |
+
"is_encoder_decoder": false,
|
| 70 |
+
"keep_aspect_ratio": false,
|
| 71 |
+
"knowledge_insulation": false,
|
| 72 |
+
"label2id": {
|
| 73 |
+
"LABEL_0": 0,
|
| 74 |
+
"LABEL_1": 1
|
| 75 |
+
},
|
| 76 |
+
"length_penalty": 1.0,
|
| 77 |
+
"llm_config": {
|
| 78 |
+
"_attn_implementation_autoset": true,
|
| 79 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 80 |
+
"add_cross_attention": false,
|
| 81 |
+
"architectures": [
|
| 82 |
+
"Qwen2ForCausalLM"
|
| 83 |
+
],
|
| 84 |
+
"attention_dropout": 0.0,
|
| 85 |
+
"auto_map": {
|
| 86 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 87 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 88 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 89 |
+
},
|
| 90 |
+
"bad_words_ids": null,
|
| 91 |
+
"begin_suppress_tokens": null,
|
| 92 |
+
"bos_token_id": 151643,
|
| 93 |
+
"chunk_size_feed_forward": 0,
|
| 94 |
+
"cross_attention_hidden_size": null,
|
| 95 |
+
"decoder_start_token_id": null,
|
| 96 |
+
"diversity_penalty": 0.0,
|
| 97 |
+
"do_sample": false,
|
| 98 |
+
"early_stopping": false,
|
| 99 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 100 |
+
"eos_token_id": 151645,
|
| 101 |
+
"exponential_decay_length_penalty": null,
|
| 102 |
+
"finetuning_task": null,
|
| 103 |
+
"forced_bos_token_id": null,
|
| 104 |
+
"forced_eos_token_id": null,
|
| 105 |
+
"hidden_act": "silu",
|
| 106 |
+
"hidden_size": 896,
|
| 107 |
+
"id2label": {
|
| 108 |
+
"0": "LABEL_0",
|
| 109 |
+
"1": "LABEL_1"
|
| 110 |
+
},
|
| 111 |
+
"initializer_range": 0.02,
|
| 112 |
+
"intermediate_size": 4864,
|
| 113 |
+
"is_decoder": false,
|
| 114 |
+
"is_encoder_decoder": false,
|
| 115 |
+
"label2id": {
|
| 116 |
+
"LABEL_0": 0,
|
| 117 |
+
"LABEL_1": 1
|
| 118 |
+
},
|
| 119 |
+
"length_penalty": 1.0,
|
| 120 |
+
"max_length": 20,
|
| 121 |
+
"max_position_embeddings": 32768,
|
| 122 |
+
"max_window_layers": 21,
|
| 123 |
+
"min_length": 0,
|
| 124 |
+
"model_type": "qwen2",
|
| 125 |
+
"no_repeat_ngram_size": 0,
|
| 126 |
+
"num_attention_heads": 14,
|
| 127 |
+
"num_beam_groups": 1,
|
| 128 |
+
"num_beams": 1,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_key_value_heads": 2,
|
| 131 |
+
"num_return_sequences": 1,
|
| 132 |
+
"output_attentions": false,
|
| 133 |
+
"output_hidden_states": false,
|
| 134 |
+
"output_scores": false,
|
| 135 |
+
"pad_token_id": null,
|
| 136 |
+
"prefix": null,
|
| 137 |
+
"problem_type": null,
|
| 138 |
+
"pruned_heads": {},
|
| 139 |
+
"remove_invalid_values": false,
|
| 140 |
+
"repetition_penalty": 1.0,
|
| 141 |
+
"return_dict": true,
|
| 142 |
+
"return_dict_in_generate": false,
|
| 143 |
+
"rms_norm_eps": 1e-06,
|
| 144 |
+
"rope_scaling": null,
|
| 145 |
+
"rope_theta": 1000000.0,
|
| 146 |
+
"sep_token_id": null,
|
| 147 |
+
"sliding_window": 32768,
|
| 148 |
+
"suppress_tokens": null,
|
| 149 |
+
"task_specific_params": null,
|
| 150 |
+
"temperature": 1.0,
|
| 151 |
+
"tf_legacy_loss": false,
|
| 152 |
+
"tie_encoder_decoder": false,
|
| 153 |
+
"tie_word_embeddings": true,
|
| 154 |
+
"tokenizer_class": null,
|
| 155 |
+
"top_k": 50,
|
| 156 |
+
"top_p": 1.0,
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"torchscript": false,
|
| 159 |
+
"transformers_version": "4.50.0.dev0",
|
| 160 |
+
"typical_p": 1.0,
|
| 161 |
+
"use_bfloat16": false,
|
| 162 |
+
"use_cache": false,
|
| 163 |
+
"use_sliding_window": false,
|
| 164 |
+
"vocab_size": 151674
|
| 165 |
+
},
|
| 166 |
+
"loss_version": "v4",
|
| 167 |
+
"max_dynamic_patch": 12,
|
| 168 |
+
"max_length": 20,
|
| 169 |
+
"min_dynamic_patch": 1,
|
| 170 |
+
"min_length": 0,
|
| 171 |
+
"mlp_checkpoint": true,
|
| 172 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 173 |
+
"model_type": "Eagle2_1BVLA",
|
| 174 |
+
"modeling": "denoising",
|
| 175 |
+
"no_repeat_ngram_size": 0,
|
| 176 |
+
"normalization": "quantile",
|
| 177 |
+
"num_beam_groups": 1,
|
| 178 |
+
"num_beams": 1,
|
| 179 |
+
"num_readouts": 1,
|
| 180 |
+
"num_return_sequences": 1,
|
| 181 |
+
"output_attentions": false,
|
| 182 |
+
"output_hidden_states": false,
|
| 183 |
+
"output_scores": false,
|
| 184 |
+
"pad2square": false,
|
| 185 |
+
"pad_token_id": null,
|
| 186 |
+
"pre_feature_reduction": false,
|
| 187 |
+
"prefix": null,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"pruned_heads": {},
|
| 190 |
+
"ps_version": "v2",
|
| 191 |
+
"readout_token_as_eos": true,
|
| 192 |
+
"remove_invalid_values": false,
|
| 193 |
+
"repetition_penalty": 1.0,
|
| 194 |
+
"return_dict": true,
|
| 195 |
+
"return_dict_in_generate": false,
|
| 196 |
+
"return_text": null,
|
| 197 |
+
"select_layer": -1,
|
| 198 |
+
"sep_token_id": null,
|
| 199 |
+
"state_dim": 10,
|
| 200 |
+
"stopping_token": "|",
|
| 201 |
+
"suppress_tokens": null,
|
| 202 |
+
"task_specific_params": null,
|
| 203 |
+
"temperature": 1.0,
|
| 204 |
+
"template": "qwen2-chat",
|
| 205 |
+
"test_denoising_steps": 10,
|
| 206 |
+
"tf_legacy_loss": false,
|
| 207 |
+
"tie_encoder_decoder": false,
|
| 208 |
+
"tie_word_embeddings": true,
|
| 209 |
+
"tokenizer_class": null,
|
| 210 |
+
"top_k": 50,
|
| 211 |
+
"top_p": 1.0,
|
| 212 |
+
"torch_dtype": "bfloat16",
|
| 213 |
+
"torchscript": false,
|
| 214 |
+
"train_denoising_steps": 100,
|
| 215 |
+
"typical_p": 1.0,
|
| 216 |
+
"use_backbone_lora": 0,
|
| 217 |
+
"use_bfloat16": false,
|
| 218 |
+
"use_llm_lora": 0,
|
| 219 |
+
"use_thumbnail": true,
|
| 220 |
+
"vision_config": {
|
| 221 |
+
"_attn_implementation_autoset": true,
|
| 222 |
+
"_name_or_path": "",
|
| 223 |
+
"add_cross_attention": false,
|
| 224 |
+
"architectures": [
|
| 225 |
+
"SiglipVisionModel"
|
| 226 |
+
],
|
| 227 |
+
"attention_dropout": 0.0,
|
| 228 |
+
"auto_map": {
|
| 229 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 230 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 231 |
+
},
|
| 232 |
+
"bad_words_ids": null,
|
| 233 |
+
"begin_suppress_tokens": null,
|
| 234 |
+
"bos_token_id": null,
|
| 235 |
+
"chunk_size_feed_forward": 0,
|
| 236 |
+
"cross_attention_hidden_size": null,
|
| 237 |
+
"decoder_start_token_id": null,
|
| 238 |
+
"diversity_penalty": 0.0,
|
| 239 |
+
"do_sample": false,
|
| 240 |
+
"drop_path_rate": 0.1,
|
| 241 |
+
"early_stopping": false,
|
| 242 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 243 |
+
"eos_token_id": null,
|
| 244 |
+
"exponential_decay_length_penalty": null,
|
| 245 |
+
"finetuning_task": null,
|
| 246 |
+
"forced_bos_token_id": null,
|
| 247 |
+
"forced_eos_token_id": null,
|
| 248 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 249 |
+
"hidden_size": 1152,
|
| 250 |
+
"id2label": {
|
| 251 |
+
"0": "LABEL_0",
|
| 252 |
+
"1": "LABEL_1"
|
| 253 |
+
},
|
| 254 |
+
"image_size": 448,
|
| 255 |
+
"intermediate_size": 4304,
|
| 256 |
+
"is_decoder": false,
|
| 257 |
+
"is_encoder_decoder": false,
|
| 258 |
+
"label2id": {
|
| 259 |
+
"LABEL_0": 0,
|
| 260 |
+
"LABEL_1": 1
|
| 261 |
+
},
|
| 262 |
+
"layer_norm_eps": 1e-06,
|
| 263 |
+
"length_penalty": 1.0,
|
| 264 |
+
"max_length": 20,
|
| 265 |
+
"min_length": 0,
|
| 266 |
+
"model_type": "siglip_vision_model",
|
| 267 |
+
"no_repeat_ngram_size": 0,
|
| 268 |
+
"num_attention_heads": 16,
|
| 269 |
+
"num_beam_groups": 1,
|
| 270 |
+
"num_beams": 1,
|
| 271 |
+
"num_channels": 3,
|
| 272 |
+
"num_hidden_layers": 27,
|
| 273 |
+
"num_image_tokens": 1024,
|
| 274 |
+
"num_return_sequences": 1,
|
| 275 |
+
"output_attentions": false,
|
| 276 |
+
"output_hidden_states": false,
|
| 277 |
+
"output_scores": false,
|
| 278 |
+
"pad_token_id": null,
|
| 279 |
+
"patch_size": 14,
|
| 280 |
+
"prefix": null,
|
| 281 |
+
"problem_type": null,
|
| 282 |
+
"projection_dim": 2048,
|
| 283 |
+
"projector_hidden_act": "gelu_fast",
|
| 284 |
+
"pruned_heads": {},
|
| 285 |
+
"remove_invalid_values": false,
|
| 286 |
+
"repetition_penalty": 1.0,
|
| 287 |
+
"return_dict": true,
|
| 288 |
+
"return_dict_in_generate": false,
|
| 289 |
+
"sep_token_id": null,
|
| 290 |
+
"suppress_tokens": null,
|
| 291 |
+
"task_specific_params": null,
|
| 292 |
+
"temperature": 1.0,
|
| 293 |
+
"tf_legacy_loss": false,
|
| 294 |
+
"tie_encoder_decoder": false,
|
| 295 |
+
"tie_word_embeddings": true,
|
| 296 |
+
"tokenizer_class": null,
|
| 297 |
+
"top_k": 50,
|
| 298 |
+
"top_p": 1.0,
|
| 299 |
+
"torch_dtype": "bfloat16",
|
| 300 |
+
"torchscript": false,
|
| 301 |
+
"transformers_version": "4.50.0.dev0",
|
| 302 |
+
"typical_p": 1.0,
|
| 303 |
+
"use_bfloat16": false,
|
| 304 |
+
"vision_use_head": false
|
| 305 |
+
},
|
| 306 |
+
"vocab_size": 151674,
|
| 307 |
+
"vocab_start": null
|
| 308 |
+
},
|
| 309 |
+
"singlevla_config_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 310 |
+
"singlevla_pretrained_path": null,
|
| 311 |
+
"state_dim": 10,
|
| 312 |
+
"torch_dtype": "bfloat16",
|
| 313 |
+
"transformers_version": "4.50.0.dev0"
|
| 314 |
+
}
|
2e-5/twinvla-scratch-aloha_lift_box/dataset_statistics.json
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aloha_lift_box": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.36333414912223816,
|
| 6 |
+
-0.018824385479092598,
|
| 7 |
+
0.17903447151184082,
|
| 8 |
+
0.40831902623176575,
|
| 9 |
+
-0.11689134687185287,
|
| 10 |
+
-0.8073355555534363,
|
| 11 |
+
-0.10573221743106842,
|
| 12 |
+
0.9415335059165955,
|
| 13 |
+
-0.1624741405248642,
|
| 14 |
+
0.619253396987915,
|
| 15 |
+
0.360858291387558,
|
| 16 |
+
0.013982057571411133,
|
| 17 |
+
0.20412704348564148,
|
| 18 |
+
0.5001598596572876,
|
| 19 |
+
0.1113751009106636,
|
| 20 |
+
-0.7415224313735962,
|
| 21 |
+
0.052125416696071625,
|
| 22 |
+
0.9483603239059448,
|
| 23 |
+
0.16254939138889313,
|
| 24 |
+
0.7590736150741577
|
| 25 |
+
],
|
| 26 |
+
"std": [
|
| 27 |
+
0.0638059601187706,
|
| 28 |
+
0.06317952275276184,
|
| 29 |
+
0.11073730885982513,
|
| 30 |
+
0.31736457347869873,
|
| 31 |
+
0.13928908109664917,
|
| 32 |
+
0.2184142768383026,
|
| 33 |
+
0.22394300997257233,
|
| 34 |
+
0.08008279651403427,
|
| 35 |
+
0.13921108841896057,
|
| 36 |
+
0.785220742225647,
|
| 37 |
+
0.05687877535820007,
|
| 38 |
+
0.059404969215393066,
|
| 39 |
+
0.1170634776353836,
|
| 40 |
+
0.3238433599472046,
|
| 41 |
+
0.14080215990543365,
|
| 42 |
+
0.2507486343383789,
|
| 43 |
+
0.21644321084022522,
|
| 44 |
+
0.07544828206300735,
|
| 45 |
+
0.1375824511051178,
|
| 46 |
+
0.6510355472564697
|
| 47 |
+
],
|
| 48 |
+
"max": [
|
| 49 |
+
0.5681452751159668,
|
| 50 |
+
0.2437673658132553,
|
| 51 |
+
0.45541316270828247,
|
| 52 |
+
0.9999293088912964,
|
| 53 |
+
0.523757815361023,
|
| 54 |
+
0.4592168927192688,
|
| 55 |
+
0.7756927013397217,
|
| 56 |
+
0.9999935030937195,
|
| 57 |
+
0.2805824279785156,
|
| 58 |
+
1.0,
|
| 59 |
+
0.5600330233573914,
|
| 60 |
+
0.3342031240463257,
|
| 61 |
+
0.4682213366031647,
|
| 62 |
+
0.9998393058776855,
|
| 63 |
+
0.7949740886688232,
|
| 64 |
+
0.1664249449968338,
|
| 65 |
+
0.9131186604499817,
|
| 66 |
+
0.9999967813491821,
|
| 67 |
+
0.7936055064201355,
|
| 68 |
+
1.0
|
| 69 |
+
],
|
| 70 |
+
"min": [
|
| 71 |
+
0.1517709195613861,
|
| 72 |
+
-0.2900286316871643,
|
| 73 |
+
-0.07412093877792358,
|
| 74 |
+
-0.4022133946418762,
|
| 75 |
+
-0.7361933588981628,
|
| 76 |
+
-0.9999988079071045,
|
| 77 |
+
-0.9935019016265869,
|
| 78 |
+
0.10709662735462189,
|
| 79 |
+
-0.8023554682731628,
|
| 80 |
+
-1.0,
|
| 81 |
+
0.15366072952747345,
|
| 82 |
+
-0.23686714470386505,
|
| 83 |
+
0.0008372184820473194,
|
| 84 |
+
-0.5509981513023376,
|
| 85 |
+
-0.35234102606773376,
|
| 86 |
+
-0.999956488609314,
|
| 87 |
+
-0.5318384766578674,
|
| 88 |
+
0.3388061225414276,
|
| 89 |
+
-0.27330997586250305,
|
| 90 |
+
-1.0
|
| 91 |
+
],
|
| 92 |
+
"q01": [
|
| 93 |
+
0.21054682105779648,
|
| 94 |
+
-0.1866426882147789,
|
| 95 |
+
0.008138886513188495,
|
| 96 |
+
-0.19710821226239203,
|
| 97 |
+
-0.5368945515155792,
|
| 98 |
+
-0.9981186389923096,
|
| 99 |
+
-0.6956261324882507,
|
| 100 |
+
0.6267582887411117,
|
| 101 |
+
-0.5600040704011917,
|
| 102 |
+
-1.0,
|
| 103 |
+
0.2190245844423771,
|
| 104 |
+
-0.15968348175287247,
|
| 105 |
+
0.025033411756157874,
|
| 106 |
+
-0.23832830414175987,
|
| 107 |
+
-0.2097599548101425,
|
| 108 |
+
-0.9988620406389237,
|
| 109 |
+
-0.4039672353863716,
|
| 110 |
+
0.6080100274085999,
|
| 111 |
+
-0.19206354618072508,
|
| 112 |
+
-1.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.5033414244651794,
|
| 116 |
+
0.16928535521030416,
|
| 117 |
+
0.41566276580095285,
|
| 118 |
+
0.9899059218168258,
|
| 119 |
+
0.15462822496891018,
|
| 120 |
+
0.03764873944222882,
|
| 121 |
+
0.4657947558164549,
|
| 122 |
+
0.9995575082302094,
|
| 123 |
+
0.12326683558523567,
|
| 124 |
+
1.0,
|
| 125 |
+
0.4801343524456022,
|
| 126 |
+
0.1795493066310881,
|
| 127 |
+
0.4235989159345625,
|
| 128 |
+
0.9913575077056883,
|
| 129 |
+
0.5356137681007356,
|
| 130 |
+
0.044951977618036626,
|
| 131 |
+
0.7084567189216593,
|
| 132 |
+
0.9992782145738601,
|
| 133 |
+
0.6150667482614517,
|
| 134 |
+
1.0
|
| 135 |
+
],
|
| 136 |
+
"mask": [
|
| 137 |
+
true,
|
| 138 |
+
true,
|
| 139 |
+
true,
|
| 140 |
+
false,
|
| 141 |
+
false,
|
| 142 |
+
false,
|
| 143 |
+
false,
|
| 144 |
+
false,
|
| 145 |
+
false,
|
| 146 |
+
false,
|
| 147 |
+
true,
|
| 148 |
+
true,
|
| 149 |
+
true,
|
| 150 |
+
false,
|
| 151 |
+
false,
|
| 152 |
+
false,
|
| 153 |
+
false,
|
| 154 |
+
false,
|
| 155 |
+
false,
|
| 156 |
+
false
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
"proprio": {
|
| 160 |
+
"mean": [
|
| 161 |
+
0.33022773265838623,
|
| 162 |
+
-0.022152910009026527,
|
| 163 |
+
0.15129819512367249,
|
| 164 |
+
0.2713927626609802,
|
| 165 |
+
-0.12320420891046524,
|
| 166 |
+
-0.8610075116157532,
|
| 167 |
+
-0.11408627033233643,
|
| 168 |
+
0.939403772354126,
|
| 169 |
+
-0.15196889638900757,
|
| 170 |
+
0.565762460231781,
|
| 171 |
+
0.3260097801685333,
|
| 172 |
+
0.01288874913007021,
|
| 173 |
+
0.17190413177013397,
|
| 174 |
+
0.3495618999004364,
|
| 175 |
+
0.11379288882017136,
|
| 176 |
+
-0.8095750212669373,
|
| 177 |
+
0.032405924052000046,
|
| 178 |
+
0.9490125179290771,
|
| 179 |
+
0.14386717975139618,
|
| 180 |
+
0.7014499306678772
|
| 181 |
+
],
|
| 182 |
+
"std": [
|
| 183 |
+
0.0579490028321743,
|
| 184 |
+
0.07013536244630814,
|
| 185 |
+
0.09934848546981812,
|
| 186 |
+
0.3117230236530304,
|
| 187 |
+
0.13217779994010925,
|
| 188 |
+
0.23493210971355438,
|
| 189 |
+
0.2282971888780594,
|
| 190 |
+
0.0823960080742836,
|
| 191 |
+
0.1500341296195984,
|
| 192 |
+
0.8515227437019348,
|
| 193 |
+
0.04813272878527641,
|
| 194 |
+
0.06810668110847473,
|
| 195 |
+
0.10921779274940491,
|
| 196 |
+
0.34190261363983154,
|
| 197 |
+
0.146615669131279,
|
| 198 |
+
0.26654887199401855,
|
| 199 |
+
0.22192324697971344,
|
| 200 |
+
0.08192051947116852,
|
| 201 |
+
0.147200345993042,
|
| 202 |
+
0.7158040404319763
|
| 203 |
+
],
|
| 204 |
+
"max": [
|
| 205 |
+
0.559941291809082,
|
| 206 |
+
0.26086756587028503,
|
| 207 |
+
0.4504527747631073,
|
| 208 |
+
0.9999247789382935,
|
| 209 |
+
0.4198993146419525,
|
| 210 |
+
0.3512286841869354,
|
| 211 |
+
0.7522457242012024,
|
| 212 |
+
1.0,
|
| 213 |
+
0.8956095576286316,
|
| 214 |
+
1.470957636833191,
|
| 215 |
+
0.5437091588973999,
|
| 216 |
+
0.32627788186073303,
|
| 217 |
+
0.4945259988307953,
|
| 218 |
+
0.9998428821563721,
|
| 219 |
+
0.7737792730331421,
|
| 220 |
+
0.4633983373641968,
|
| 221 |
+
0.9018308520317078,
|
| 222 |
+
1.0,
|
| 223 |
+
0.9907073378562927,
|
| 224 |
+
1.361535668373108
|
| 225 |
+
],
|
| 226 |
+
"min": [
|
| 227 |
+
0.16681896150112152,
|
| 228 |
+
-0.20499344170093536,
|
| 229 |
+
-0.0030731656588613987,
|
| 230 |
+
-0.4872298836708069,
|
| 231 |
+
-0.6995252966880798,
|
| 232 |
+
-0.999997615814209,
|
| 233 |
+
-0.988165020942688,
|
| 234 |
+
0.14152538776397705,
|
| 235 |
+
-0.8483264446258545,
|
| 236 |
+
-1.2196638584136963,
|
| 237 |
+
0.14598572254180908,
|
| 238 |
+
-0.2277291864156723,
|
| 239 |
+
0.004666368011385202,
|
| 240 |
+
-0.5699886679649353,
|
| 241 |
+
-0.40678924322128296,
|
| 242 |
+
-0.9999999403953552,
|
| 243 |
+
-0.6972882151603699,
|
| 244 |
+
0.13462646305561066,
|
| 245 |
+
-0.643044650554657,
|
| 246 |
+
-1.164451003074646
|
| 247 |
+
],
|
| 248 |
+
"q01": [
|
| 249 |
+
0.2053149801492691,
|
| 250 |
+
-0.17586381256580352,
|
| 251 |
+
0.015469378884881736,
|
| 252 |
+
-0.2516648331284523,
|
| 253 |
+
-0.5193796420097351,
|
| 254 |
+
-0.9995058274269104,
|
| 255 |
+
-0.7092818850278855,
|
| 256 |
+
0.608681161403656,
|
| 257 |
+
-0.578884813785553,
|
| 258 |
+
-1.1618710005283355,
|
| 259 |
+
0.21638940930366515,
|
| 260 |
+
-0.1691040216386318,
|
| 261 |
+
0.011891756923869252,
|
| 262 |
+
-0.29012590169906616,
|
| 263 |
+
-0.20126488715410232,
|
| 264 |
+
-0.9995589327812194,
|
| 265 |
+
-0.49963704913854595,
|
| 266 |
+
0.533765652179718,
|
| 267 |
+
-0.18726778730750085,
|
| 268 |
+
-1.082753186225891
|
| 269 |
+
],
|
| 270 |
+
"q99": [
|
| 271 |
+
0.5071819436550137,
|
| 272 |
+
0.165744510143995,
|
| 273 |
+
0.40272374808788297,
|
| 274 |
+
0.995180070400238,
|
| 275 |
+
0.16266889929771197,
|
| 276 |
+
0.09040380395948588,
|
| 277 |
+
0.5001266032457347,
|
| 278 |
+
0.9997656464576721,
|
| 279 |
+
0.10759550034999843,
|
| 280 |
+
1.4176189756393425,
|
| 281 |
+
0.47452601760625834,
|
| 282 |
+
0.1839943121373646,
|
| 283 |
+
0.40895662158727647,
|
| 284 |
+
0.995180070400238,
|
| 285 |
+
0.5622373461723318,
|
| 286 |
+
0.07441098906099738,
|
| 287 |
+
0.7114433652162524,
|
| 288 |
+
0.999856880903244,
|
| 289 |
+
0.5974926966428754,
|
| 290 |
+
1.321595377922058
|
| 291 |
+
],
|
| 292 |
+
"mask": [
|
| 293 |
+
true,
|
| 294 |
+
true,
|
| 295 |
+
true,
|
| 296 |
+
false,
|
| 297 |
+
false,
|
| 298 |
+
false,
|
| 299 |
+
false,
|
| 300 |
+
false,
|
| 301 |
+
false,
|
| 302 |
+
false,
|
| 303 |
+
true,
|
| 304 |
+
true,
|
| 305 |
+
true,
|
| 306 |
+
false,
|
| 307 |
+
false,
|
| 308 |
+
false,
|
| 309 |
+
false,
|
| 310 |
+
false,
|
| 311 |
+
false,
|
| 312 |
+
false
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"num_transitions": 11572,
|
| 316 |
+
"num_trajectories": 50
|
| 317 |
+
},
|
| 318 |
+
"aloha_lift_box_new": {
|
| 319 |
+
"action": {
|
| 320 |
+
"mean": [
|
| 321 |
+
0.36333414912223816,
|
| 322 |
+
-0.018824385479092598,
|
| 323 |
+
0.17903447151184082,
|
| 324 |
+
0.40831902623176575,
|
| 325 |
+
-0.11689134687185287,
|
| 326 |
+
-0.8073355555534363,
|
| 327 |
+
-0.10573221743106842,
|
| 328 |
+
0.9415335059165955,
|
| 329 |
+
-0.1624741405248642,
|
| 330 |
+
0.619253396987915,
|
| 331 |
+
0.360858291387558,
|
| 332 |
+
0.013982057571411133,
|
| 333 |
+
0.20412704348564148,
|
| 334 |
+
0.5001598596572876,
|
| 335 |
+
0.1113751009106636,
|
| 336 |
+
-0.7415224313735962,
|
| 337 |
+
0.052125416696071625,
|
| 338 |
+
0.9483603239059448,
|
| 339 |
+
0.16254939138889313,
|
| 340 |
+
0.7590736150741577
|
| 341 |
+
],
|
| 342 |
+
"std": [
|
| 343 |
+
0.0638059601187706,
|
| 344 |
+
0.06317952275276184,
|
| 345 |
+
0.11073730885982513,
|
| 346 |
+
0.31736457347869873,
|
| 347 |
+
0.13928908109664917,
|
| 348 |
+
0.2184142768383026,
|
| 349 |
+
0.22394300997257233,
|
| 350 |
+
0.08008279651403427,
|
| 351 |
+
0.13921108841896057,
|
| 352 |
+
0.785220742225647,
|
| 353 |
+
0.05687877535820007,
|
| 354 |
+
0.059404969215393066,
|
| 355 |
+
0.1170634776353836,
|
| 356 |
+
0.3238433599472046,
|
| 357 |
+
0.14080215990543365,
|
| 358 |
+
0.2507486343383789,
|
| 359 |
+
0.21644321084022522,
|
| 360 |
+
0.07544828206300735,
|
| 361 |
+
0.1375824511051178,
|
| 362 |
+
0.6510355472564697
|
| 363 |
+
],
|
| 364 |
+
"max": [
|
| 365 |
+
0.5681452751159668,
|
| 366 |
+
0.2437673658132553,
|
| 367 |
+
0.45541316270828247,
|
| 368 |
+
0.9999293088912964,
|
| 369 |
+
0.523757815361023,
|
| 370 |
+
0.4592168927192688,
|
| 371 |
+
0.7756927013397217,
|
| 372 |
+
0.9999935030937195,
|
| 373 |
+
0.2805824279785156,
|
| 374 |
+
1.0,
|
| 375 |
+
0.5600330233573914,
|
| 376 |
+
0.3342031240463257,
|
| 377 |
+
0.4682213366031647,
|
| 378 |
+
0.9998393058776855,
|
| 379 |
+
0.7949740886688232,
|
| 380 |
+
0.1664249449968338,
|
| 381 |
+
0.9131186604499817,
|
| 382 |
+
0.9999967813491821,
|
| 383 |
+
0.7936055064201355,
|
| 384 |
+
1.0
|
| 385 |
+
],
|
| 386 |
+
"min": [
|
| 387 |
+
0.1517709195613861,
|
| 388 |
+
-0.2900286316871643,
|
| 389 |
+
-0.07412093877792358,
|
| 390 |
+
-0.4022133946418762,
|
| 391 |
+
-0.7361933588981628,
|
| 392 |
+
-0.9999988079071045,
|
| 393 |
+
-0.9935019016265869,
|
| 394 |
+
0.10709662735462189,
|
| 395 |
+
-0.8023554682731628,
|
| 396 |
+
-1.0,
|
| 397 |
+
0.15366072952747345,
|
| 398 |
+
-0.23686714470386505,
|
| 399 |
+
0.0008372184820473194,
|
| 400 |
+
-0.5509981513023376,
|
| 401 |
+
-0.35234102606773376,
|
| 402 |
+
-0.999956488609314,
|
| 403 |
+
-0.5318384766578674,
|
| 404 |
+
0.3388061225414276,
|
| 405 |
+
-0.27330997586250305,
|
| 406 |
+
-1.0
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
0.21054682105779648,
|
| 410 |
+
-0.1866426882147789,
|
| 411 |
+
0.008138886513188495,
|
| 412 |
+
-0.19710821226239203,
|
| 413 |
+
-0.5368945515155792,
|
| 414 |
+
-0.9981186389923096,
|
| 415 |
+
-0.6956261324882507,
|
| 416 |
+
0.6267582887411117,
|
| 417 |
+
-0.5600040704011917,
|
| 418 |
+
-1.0,
|
| 419 |
+
0.2190245844423771,
|
| 420 |
+
-0.15968348175287247,
|
| 421 |
+
0.025033411756157874,
|
| 422 |
+
-0.23832830414175987,
|
| 423 |
+
-0.2097599548101425,
|
| 424 |
+
-0.9988620406389237,
|
| 425 |
+
-0.4039672353863716,
|
| 426 |
+
0.6080100274085999,
|
| 427 |
+
-0.19206354618072508,
|
| 428 |
+
-1.0
|
| 429 |
+
],
|
| 430 |
+
"q99": [
|
| 431 |
+
0.5033414244651794,
|
| 432 |
+
0.16928535521030416,
|
| 433 |
+
0.41566276580095285,
|
| 434 |
+
0.9899059218168258,
|
| 435 |
+
0.15462822496891018,
|
| 436 |
+
0.03764873944222882,
|
| 437 |
+
0.4657947558164549,
|
| 438 |
+
0.9995575082302094,
|
| 439 |
+
0.12326683558523567,
|
| 440 |
+
1.0,
|
| 441 |
+
0.4801343524456022,
|
| 442 |
+
0.1795493066310881,
|
| 443 |
+
0.4235989159345625,
|
| 444 |
+
0.9913575077056883,
|
| 445 |
+
0.5356137681007356,
|
| 446 |
+
0.044951977618036626,
|
| 447 |
+
0.7084567189216593,
|
| 448 |
+
0.9992782145738601,
|
| 449 |
+
0.6150667482614517,
|
| 450 |
+
1.0
|
| 451 |
+
],
|
| 452 |
+
"mask": [
|
| 453 |
+
true,
|
| 454 |
+
true,
|
| 455 |
+
true,
|
| 456 |
+
false,
|
| 457 |
+
false,
|
| 458 |
+
false,
|
| 459 |
+
false,
|
| 460 |
+
false,
|
| 461 |
+
false,
|
| 462 |
+
false,
|
| 463 |
+
true,
|
| 464 |
+
true,
|
| 465 |
+
true,
|
| 466 |
+
false,
|
| 467 |
+
false,
|
| 468 |
+
false,
|
| 469 |
+
false,
|
| 470 |
+
false,
|
| 471 |
+
false,
|
| 472 |
+
false
|
| 473 |
+
]
|
| 474 |
+
},
|
| 475 |
+
"proprio": {
|
| 476 |
+
"mean": [
|
| 477 |
+
0.33022773265838623,
|
| 478 |
+
-0.022152910009026527,
|
| 479 |
+
0.15129819512367249,
|
| 480 |
+
0.2713927626609802,
|
| 481 |
+
-0.12320420891046524,
|
| 482 |
+
-0.8610075116157532,
|
| 483 |
+
-0.11408627033233643,
|
| 484 |
+
0.939403772354126,
|
| 485 |
+
-0.15196889638900757,
|
| 486 |
+
0.565762460231781,
|
| 487 |
+
0.3260097801685333,
|
| 488 |
+
0.01288874913007021,
|
| 489 |
+
0.17190413177013397,
|
| 490 |
+
0.3495618999004364,
|
| 491 |
+
0.11379288882017136,
|
| 492 |
+
-0.8095750212669373,
|
| 493 |
+
0.032405924052000046,
|
| 494 |
+
0.9490125179290771,
|
| 495 |
+
0.14386717975139618,
|
| 496 |
+
0.7014499306678772
|
| 497 |
+
],
|
| 498 |
+
"std": [
|
| 499 |
+
0.0579490028321743,
|
| 500 |
+
0.07013536244630814,
|
| 501 |
+
0.09934848546981812,
|
| 502 |
+
0.3117230236530304,
|
| 503 |
+
0.13217779994010925,
|
| 504 |
+
0.23493210971355438,
|
| 505 |
+
0.2282971888780594,
|
| 506 |
+
0.0823960080742836,
|
| 507 |
+
0.1500341296195984,
|
| 508 |
+
0.8515227437019348,
|
| 509 |
+
0.04813272878527641,
|
| 510 |
+
0.06810668110847473,
|
| 511 |
+
0.10921779274940491,
|
| 512 |
+
0.34190261363983154,
|
| 513 |
+
0.146615669131279,
|
| 514 |
+
0.26654887199401855,
|
| 515 |
+
0.22192324697971344,
|
| 516 |
+
0.08192051947116852,
|
| 517 |
+
0.147200345993042,
|
| 518 |
+
0.7158040404319763
|
| 519 |
+
],
|
| 520 |
+
"max": [
|
| 521 |
+
0.559941291809082,
|
| 522 |
+
0.26086756587028503,
|
| 523 |
+
0.4504527747631073,
|
| 524 |
+
0.9999247789382935,
|
| 525 |
+
0.4198993146419525,
|
| 526 |
+
0.3512286841869354,
|
| 527 |
+
0.7522457242012024,
|
| 528 |
+
1.0,
|
| 529 |
+
0.8956095576286316,
|
| 530 |
+
1.470957636833191,
|
| 531 |
+
0.5437091588973999,
|
| 532 |
+
0.32627788186073303,
|
| 533 |
+
0.4945259988307953,
|
| 534 |
+
0.9998428821563721,
|
| 535 |
+
0.7737792730331421,
|
| 536 |
+
0.4633983373641968,
|
| 537 |
+
0.9018308520317078,
|
| 538 |
+
1.0,
|
| 539 |
+
0.9907073378562927,
|
| 540 |
+
1.361535668373108
|
| 541 |
+
],
|
| 542 |
+
"min": [
|
| 543 |
+
0.16681896150112152,
|
| 544 |
+
-0.20499344170093536,
|
| 545 |
+
-0.0030731656588613987,
|
| 546 |
+
-0.4872298836708069,
|
| 547 |
+
-0.6995252966880798,
|
| 548 |
+
-0.999997615814209,
|
| 549 |
+
-0.988165020942688,
|
| 550 |
+
0.14152538776397705,
|
| 551 |
+
-0.8483264446258545,
|
| 552 |
+
-1.2196638584136963,
|
| 553 |
+
0.14598572254180908,
|
| 554 |
+
-0.2277291864156723,
|
| 555 |
+
0.004666368011385202,
|
| 556 |
+
-0.5699886679649353,
|
| 557 |
+
-0.40678924322128296,
|
| 558 |
+
-0.9999999403953552,
|
| 559 |
+
-0.6972882151603699,
|
| 560 |
+
0.13462646305561066,
|
| 561 |
+
-0.643044650554657,
|
| 562 |
+
-1.164451003074646
|
| 563 |
+
],
|
| 564 |
+
"q01": [
|
| 565 |
+
0.2053149801492691,
|
| 566 |
+
-0.17586381256580352,
|
| 567 |
+
0.015469378884881736,
|
| 568 |
+
-0.2516648331284523,
|
| 569 |
+
-0.5193796420097351,
|
| 570 |
+
-0.9995058274269104,
|
| 571 |
+
-0.7092818850278855,
|
| 572 |
+
0.608681161403656,
|
| 573 |
+
-0.578884813785553,
|
| 574 |
+
-1.1618710005283355,
|
| 575 |
+
0.21638940930366515,
|
| 576 |
+
-0.1691040216386318,
|
| 577 |
+
0.011891756923869252,
|
| 578 |
+
-0.29012590169906616,
|
| 579 |
+
-0.20126488715410232,
|
| 580 |
+
-0.9995589327812194,
|
| 581 |
+
-0.49963704913854595,
|
| 582 |
+
0.533765652179718,
|
| 583 |
+
-0.18726778730750085,
|
| 584 |
+
-1.082753186225891
|
| 585 |
+
],
|
| 586 |
+
"q99": [
|
| 587 |
+
0.5071819436550137,
|
| 588 |
+
0.165744510143995,
|
| 589 |
+
0.40272374808788297,
|
| 590 |
+
0.995180070400238,
|
| 591 |
+
0.16266889929771197,
|
| 592 |
+
0.09040380395948588,
|
| 593 |
+
0.5001266032457347,
|
| 594 |
+
0.9997656464576721,
|
| 595 |
+
0.10759550034999843,
|
| 596 |
+
1.4176189756393425,
|
| 597 |
+
0.47452601760625834,
|
| 598 |
+
0.1839943121373646,
|
| 599 |
+
0.40895662158727647,
|
| 600 |
+
0.995180070400238,
|
| 601 |
+
0.5622373461723318,
|
| 602 |
+
0.07441098906099738,
|
| 603 |
+
0.7114433652162524,
|
| 604 |
+
0.999856880903244,
|
| 605 |
+
0.5974926966428754,
|
| 606 |
+
1.321595377922058
|
| 607 |
+
],
|
| 608 |
+
"mask": [
|
| 609 |
+
true,
|
| 610 |
+
true,
|
| 611 |
+
true,
|
| 612 |
+
false,
|
| 613 |
+
false,
|
| 614 |
+
false,
|
| 615 |
+
false,
|
| 616 |
+
false,
|
| 617 |
+
false,
|
| 618 |
+
false,
|
| 619 |
+
true,
|
| 620 |
+
true,
|
| 621 |
+
true,
|
| 622 |
+
false,
|
| 623 |
+
false,
|
| 624 |
+
false,
|
| 625 |
+
false,
|
| 626 |
+
false,
|
| 627 |
+
false,
|
| 628 |
+
false
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
"num_transitions": 11572,
|
| 632 |
+
"num_trajectories": 50
|
| 633 |
+
}
|
| 634 |
+
}
|
2e-5/twinvla-scratch-aloha_lift_box/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5291b6de1ebdbaf84f2a9addb3ee36fb3e83050b1196984e4534ece412e74aab
|
| 3 |
+
size 2889536104
|
2e-5/twinvla-scratch-aloha_lift_box/singlevla_config/config.json
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": null,
|
| 3 |
+
"_name_or_path": "/data5/jellyho/twinvla-checkpoints/Eagle2_1B-Scratch-DiT-B",
|
| 4 |
+
"action_dim": 10,
|
| 5 |
+
"action_head": "DiT",
|
| 6 |
+
"action_head_hidden_dim": 1024,
|
| 7 |
+
"action_len": 20,
|
| 8 |
+
"aggregation": "None",
|
| 9 |
+
"architectures": [
|
| 10 |
+
"Eagle2_1BVLA"
|
| 11 |
+
],
|
| 12 |
+
"auto_map": {},
|
| 13 |
+
"denoiser": "FM",
|
| 14 |
+
"diffusion_batch": 32,
|
| 15 |
+
"dit_size": "DiT-B",
|
| 16 |
+
"downsample_ratio": 0.5,
|
| 17 |
+
"dynamic_image_size": true,
|
| 18 |
+
"efficient_loss": true,
|
| 19 |
+
"enable_cfg": true,
|
| 20 |
+
"force_image_size": 448,
|
| 21 |
+
"global_normalization": true,
|
| 22 |
+
"image_size": 448,
|
| 23 |
+
"keep_aspect_ratio": false,
|
| 24 |
+
"knowledge_insulation": false,
|
| 25 |
+
"llm_config": {
|
| 26 |
+
"_attn_implementation_autoset": true,
|
| 27 |
+
"_name_or_path": "./pretrained/Qwen2_5-0_5B-Instruct",
|
| 28 |
+
"add_cross_attention": false,
|
| 29 |
+
"architectures": [
|
| 30 |
+
"Qwen2ForCausalLM"
|
| 31 |
+
],
|
| 32 |
+
"attention_dropout": 0.0,
|
| 33 |
+
"auto_map": {
|
| 34 |
+
"AutoConfig": "configuration_qwen2.Qwen2Config",
|
| 35 |
+
"AutoModel": "modeling_qwen2.Qwen2Model",
|
| 36 |
+
"AutoModelForCausalLM": "modeling_qwen2.Qwen2ForCausalLM"
|
| 37 |
+
},
|
| 38 |
+
"bad_words_ids": null,
|
| 39 |
+
"begin_suppress_tokens": null,
|
| 40 |
+
"bos_token_id": 151643,
|
| 41 |
+
"chunk_size_feed_forward": 0,
|
| 42 |
+
"cross_attention_hidden_size": null,
|
| 43 |
+
"decoder_start_token_id": null,
|
| 44 |
+
"diversity_penalty": 0.0,
|
| 45 |
+
"do_sample": false,
|
| 46 |
+
"early_stopping": false,
|
| 47 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 48 |
+
"eos_token_id": 151645,
|
| 49 |
+
"exponential_decay_length_penalty": null,
|
| 50 |
+
"finetuning_task": null,
|
| 51 |
+
"forced_bos_token_id": null,
|
| 52 |
+
"forced_eos_token_id": null,
|
| 53 |
+
"hidden_act": "silu",
|
| 54 |
+
"hidden_size": 896,
|
| 55 |
+
"id2label": {
|
| 56 |
+
"0": "LABEL_0",
|
| 57 |
+
"1": "LABEL_1"
|
| 58 |
+
},
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4864,
|
| 61 |
+
"is_decoder": false,
|
| 62 |
+
"is_encoder_decoder": false,
|
| 63 |
+
"label2id": {
|
| 64 |
+
"LABEL_0": 0,
|
| 65 |
+
"LABEL_1": 1
|
| 66 |
+
},
|
| 67 |
+
"length_penalty": 1.0,
|
| 68 |
+
"max_length": 20,
|
| 69 |
+
"max_position_embeddings": 32768,
|
| 70 |
+
"max_window_layers": 21,
|
| 71 |
+
"min_length": 0,
|
| 72 |
+
"model_type": "qwen2",
|
| 73 |
+
"no_repeat_ngram_size": 0,
|
| 74 |
+
"num_attention_heads": 14,
|
| 75 |
+
"num_beam_groups": 1,
|
| 76 |
+
"num_beams": 1,
|
| 77 |
+
"num_hidden_layers": 24,
|
| 78 |
+
"num_key_value_heads": 2,
|
| 79 |
+
"num_return_sequences": 1,
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"output_hidden_states": false,
|
| 82 |
+
"output_scores": false,
|
| 83 |
+
"pad_token_id": null,
|
| 84 |
+
"prefix": null,
|
| 85 |
+
"problem_type": null,
|
| 86 |
+
"pruned_heads": {},
|
| 87 |
+
"remove_invalid_values": false,
|
| 88 |
+
"repetition_penalty": 1.0,
|
| 89 |
+
"return_dict": true,
|
| 90 |
+
"return_dict_in_generate": false,
|
| 91 |
+
"rms_norm_eps": 1e-06,
|
| 92 |
+
"rope_scaling": null,
|
| 93 |
+
"rope_theta": 1000000.0,
|
| 94 |
+
"sep_token_id": null,
|
| 95 |
+
"sliding_window": 32768,
|
| 96 |
+
"suppress_tokens": null,
|
| 97 |
+
"task_specific_params": null,
|
| 98 |
+
"temperature": 1.0,
|
| 99 |
+
"tf_legacy_loss": false,
|
| 100 |
+
"tie_encoder_decoder": false,
|
| 101 |
+
"tie_word_embeddings": true,
|
| 102 |
+
"tokenizer_class": null,
|
| 103 |
+
"top_k": 50,
|
| 104 |
+
"top_p": 1.0,
|
| 105 |
+
"torch_dtype": "bfloat16",
|
| 106 |
+
"torchscript": false,
|
| 107 |
+
"transformers_version": "4.50.0.dev0",
|
| 108 |
+
"typical_p": 1.0,
|
| 109 |
+
"use_bfloat16": false,
|
| 110 |
+
"use_cache": false,
|
| 111 |
+
"use_sliding_window": false,
|
| 112 |
+
"vocab_size": 151674
|
| 113 |
+
},
|
| 114 |
+
"loss_version": "v4",
|
| 115 |
+
"max_dynamic_patch": 12,
|
| 116 |
+
"min_dynamic_patch": 1,
|
| 117 |
+
"mlp_checkpoint": true,
|
| 118 |
+
"model_path": "nvidia/Eagle2-1B",
|
| 119 |
+
"model_type": "Eagle2_1BVLA",
|
| 120 |
+
"modeling": "denoising",
|
| 121 |
+
"normalization": "quantile",
|
| 122 |
+
"num_readouts": 1,
|
| 123 |
+
"pad2square": false,
|
| 124 |
+
"pre_feature_reduction": false,
|
| 125 |
+
"ps_version": "v2",
|
| 126 |
+
"readout_token_as_eos": true,
|
| 127 |
+
"return_text": null,
|
| 128 |
+
"select_layer": -1,
|
| 129 |
+
"state_dim": 10,
|
| 130 |
+
"stopping_token": "|",
|
| 131 |
+
"template": "qwen2-chat",
|
| 132 |
+
"test_denoising_steps": 10,
|
| 133 |
+
"torch_dtype": "bfloat16",
|
| 134 |
+
"train_denoising_steps": 100,
|
| 135 |
+
"transformers_version": null,
|
| 136 |
+
"use_backbone_lora": 0,
|
| 137 |
+
"use_llm_lora": 0,
|
| 138 |
+
"use_thumbnail": true,
|
| 139 |
+
"vision_config": {
|
| 140 |
+
"_attn_implementation_autoset": true,
|
| 141 |
+
"_name_or_path": "",
|
| 142 |
+
"add_cross_attention": false,
|
| 143 |
+
"architectures": [
|
| 144 |
+
"SiglipVisionModel"
|
| 145 |
+
],
|
| 146 |
+
"attention_dropout": 0.0,
|
| 147 |
+
"auto_map": {
|
| 148 |
+
"AutoConfig": "configuration_siglip.SiglipVisionConfig",
|
| 149 |
+
"AutoModel": "modeling_siglip.SiglipVisionModel"
|
| 150 |
+
},
|
| 151 |
+
"bad_words_ids": null,
|
| 152 |
+
"begin_suppress_tokens": null,
|
| 153 |
+
"bos_token_id": null,
|
| 154 |
+
"chunk_size_feed_forward": 0,
|
| 155 |
+
"cross_attention_hidden_size": null,
|
| 156 |
+
"decoder_start_token_id": null,
|
| 157 |
+
"diversity_penalty": 0.0,
|
| 158 |
+
"do_sample": false,
|
| 159 |
+
"drop_path_rate": 0.1,
|
| 160 |
+
"early_stopping": false,
|
| 161 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 162 |
+
"eos_token_id": null,
|
| 163 |
+
"exponential_decay_length_penalty": null,
|
| 164 |
+
"finetuning_task": null,
|
| 165 |
+
"forced_bos_token_id": null,
|
| 166 |
+
"forced_eos_token_id": null,
|
| 167 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 168 |
+
"hidden_size": 1152,
|
| 169 |
+
"id2label": {
|
| 170 |
+
"0": "LABEL_0",
|
| 171 |
+
"1": "LABEL_1"
|
| 172 |
+
},
|
| 173 |
+
"image_size": 448,
|
| 174 |
+
"intermediate_size": 4304,
|
| 175 |
+
"is_decoder": false,
|
| 176 |
+
"is_encoder_decoder": false,
|
| 177 |
+
"label2id": {
|
| 178 |
+
"LABEL_0": 0,
|
| 179 |
+
"LABEL_1": 1
|
| 180 |
+
},
|
| 181 |
+
"layer_norm_eps": 1e-06,
|
| 182 |
+
"length_penalty": 1.0,
|
| 183 |
+
"max_length": 20,
|
| 184 |
+
"min_length": 0,
|
| 185 |
+
"model_type": "siglip_vision_model",
|
| 186 |
+
"no_repeat_ngram_size": 0,
|
| 187 |
+
"num_attention_heads": 16,
|
| 188 |
+
"num_beam_groups": 1,
|
| 189 |
+
"num_beams": 1,
|
| 190 |
+
"num_channels": 3,
|
| 191 |
+
"num_hidden_layers": 27,
|
| 192 |
+
"num_image_tokens": 1024,
|
| 193 |
+
"num_return_sequences": 1,
|
| 194 |
+
"output_attentions": false,
|
| 195 |
+
"output_hidden_states": false,
|
| 196 |
+
"output_scores": false,
|
| 197 |
+
"pad_token_id": null,
|
| 198 |
+
"patch_size": 14,
|
| 199 |
+
"prefix": null,
|
| 200 |
+
"problem_type": null,
|
| 201 |
+
"projection_dim": 2048,
|
| 202 |
+
"projector_hidden_act": "gelu_fast",
|
| 203 |
+
"pruned_heads": {},
|
| 204 |
+
"remove_invalid_values": false,
|
| 205 |
+
"repetition_penalty": 1.0,
|
| 206 |
+
"return_dict": true,
|
| 207 |
+
"return_dict_in_generate": false,
|
| 208 |
+
"sep_token_id": null,
|
| 209 |
+
"suppress_tokens": null,
|
| 210 |
+
"task_specific_params": null,
|
| 211 |
+
"temperature": 1.0,
|
| 212 |
+
"tf_legacy_loss": false,
|
| 213 |
+
"tie_encoder_decoder": false,
|
| 214 |
+
"tie_word_embeddings": true,
|
| 215 |
+
"tokenizer_class": null,
|
| 216 |
+
"top_k": 50,
|
| 217 |
+
"top_p": 1.0,
|
| 218 |
+
"torch_dtype": "bfloat16",
|
| 219 |
+
"torchscript": false,
|
| 220 |
+
"transformers_version": "4.50.0.dev0",
|
| 221 |
+
"typical_p": 1.0,
|
| 222 |
+
"use_bfloat16": false,
|
| 223 |
+
"vision_use_head": false
|
| 224 |
+
},
|
| 225 |
+
"vocab_size": 151674,
|
| 226 |
+
"vocab_start": null
|
| 227 |
+
}
|
2e-5/twinvla-scratch-aloha_lift_box/training_states.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c17b8c72a7a6ef1706774e683a3696ca8fbcfeaec772981c175ccce137870553
|
| 3 |
+
size 4126124658
|