Hoshipu commited on
Commit
ef1a872
·
verified ·
1 Parent(s): cc5b417

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,1386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/shared_work/markhsp/checkpoints/X-VLA-Pt",
3
+ "action_mode": "auto",
4
+ "architectures": [
5
+ "XVLA"
6
+ ],
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_xvla.XVLAConfig",
9
+ "AutoModel": "modeling_xvla.XVLA"
10
+ },
11
+ "depth": 24,
12
+ "dim_time": 32,
13
+ "florence_config": {
14
+ "_attn_implementation_autoset": true,
15
+ "bos_token_id": 0,
16
+ "eos_token_id": 2,
17
+ "is_encoder_decoder": true,
18
+ "model_type": "florence2",
19
+ "pad_token_id": 1,
20
+ "text_config": {
21
+ "_attn_implementation_autoset": true,
22
+ "_name_or_path": "",
23
+ "activation_dropout": 0.1,
24
+ "activation_function": "gelu",
25
+ "add_bias_logits": false,
26
+ "add_cross_attention": false,
27
+ "add_final_layer_norm": false,
28
+ "architectures": null,
29
+ "attention_dropout": 0.1,
30
+ "bad_words_ids": null,
31
+ "begin_suppress_tokens": null,
32
+ "bos_token_id": 0,
33
+ "chunk_size_feed_forward": 0,
34
+ "classif_dropout": 0.1,
35
+ "classifier_dropout": 0.0,
36
+ "cross_attention_hidden_size": null,
37
+ "d_model": 1024,
38
+ "decoder_attention_heads": 16,
39
+ "decoder_ffn_dim": 4096,
40
+ "decoder_layerdrop": 0.0,
41
+ "decoder_layers": 12,
42
+ "decoder_start_token_id": 2,
43
+ "diversity_penalty": 0.0,
44
+ "do_sample": false,
45
+ "dropout": 0.1,
46
+ "early_stopping": true,
47
+ "encoder_attention_heads": 16,
48
+ "encoder_ffn_dim": 4096,
49
+ "encoder_layerdrop": 0.0,
50
+ "encoder_layers": 12,
51
+ "encoder_no_repeat_ngram_size": 0,
52
+ "eos_token_id": 2,
53
+ "exponential_decay_length_penalty": null,
54
+ "finetuning_task": null,
55
+ "forced_bos_token_id": 0,
56
+ "forced_eos_token_id": 2,
57
+ "gradient_checkpointing": false,
58
+ "id2label": {
59
+ "0": "LABEL_0",
60
+ "1": "LABEL_1",
61
+ "2": "LABEL_2"
62
+ },
63
+ "init_std": 0.02,
64
+ "is_decoder": false,
65
+ "is_encoder_decoder": true,
66
+ "label2id": {
67
+ "LABEL_0": 0,
68
+ "LABEL_1": 1,
69
+ "LABEL_2": 2
70
+ },
71
+ "length_penalty": 1.0,
72
+ "max_length": 20,
73
+ "max_position_embeddings": 4096,
74
+ "min_length": 0,
75
+ "model_type": "florence2_language",
76
+ "no_repeat_ngram_size": 3,
77
+ "normalize_before": false,
78
+ "num_beam_groups": 1,
79
+ "num_beams": 3,
80
+ "num_hidden_layers": 12,
81
+ "num_return_sequences": 1,
82
+ "output_attentions": false,
83
+ "output_hidden_states": false,
84
+ "output_scores": false,
85
+ "pad_token_id": 1,
86
+ "prefix": null,
87
+ "problem_type": null,
88
+ "pruned_heads": {},
89
+ "remove_invalid_values": false,
90
+ "repetition_penalty": 1.0,
91
+ "return_dict": true,
92
+ "return_dict_in_generate": false,
93
+ "scale_embedding": false,
94
+ "sep_token_id": null,
95
+ "suppress_tokens": null,
96
+ "task_specific_params": null,
97
+ "temperature": 1.0,
98
+ "tf_legacy_loss": false,
99
+ "tie_encoder_decoder": false,
100
+ "tie_word_embeddings": true,
101
+ "tokenizer_class": null,
102
+ "top_k": 50,
103
+ "top_p": 1.0,
104
+ "torch_dtype": null,
105
+ "torchscript": false,
106
+ "typical_p": 1.0,
107
+ "use_bfloat16": false,
108
+ "use_cache": true,
109
+ "vocab_size": 51289
110
+ },
111
+ "torch_dtype": "float32",
112
+ "vision_config": {
113
+ "_attn_implementation_autoset": false,
114
+ "_name_or_path": "",
115
+ "add_cross_attention": false,
116
+ "architectures": null,
117
+ "bad_words_ids": null,
118
+ "begin_suppress_tokens": null,
119
+ "bos_token_id": null,
120
+ "chunk_size_feed_forward": 0,
121
+ "cross_attention_hidden_size": null,
122
+ "decoder_start_token_id": null,
123
+ "depths": [
124
+ 1,
125
+ 1,
126
+ 9,
127
+ 1
128
+ ],
129
+ "dim_embed": [
130
+ 256,
131
+ 512,
132
+ 1024,
133
+ 2048
134
+ ],
135
+ "diversity_penalty": 0.0,
136
+ "do_sample": false,
137
+ "drop_path_rate": 0.1,
138
+ "early_stopping": false,
139
+ "enable_checkpoint": false,
140
+ "encoder_no_repeat_ngram_size": 0,
141
+ "eos_token_id": null,
142
+ "exponential_decay_length_penalty": null,
143
+ "finetuning_task": null,
144
+ "forced_bos_token_id": null,
145
+ "forced_eos_token_id": null,
146
+ "id2label": {
147
+ "0": "LABEL_0",
148
+ "1": "LABEL_1"
149
+ },
150
+ "image_feature_source": [
151
+ "spatial_avg_pool",
152
+ "temporal_avg_pool"
153
+ ],
154
+ "image_pos_embed": {
155
+ "max_pos_embeddings": 50,
156
+ "type": "learned_abs_2d"
157
+ },
158
+ "is_decoder": false,
159
+ "is_encoder_decoder": false,
160
+ "label2id": {
161
+ "LABEL_0": 0,
162
+ "LABEL_1": 1
163
+ },
164
+ "length_penalty": 1.0,
165
+ "max_length": 20,
166
+ "min_length": 0,
167
+ "model_type": "davit",
168
+ "no_repeat_ngram_size": 0,
169
+ "num_beam_groups": 1,
170
+ "num_beams": 1,
171
+ "num_groups": [
172
+ 8,
173
+ 16,
174
+ 32,
175
+ 64
176
+ ],
177
+ "num_heads": [
178
+ 8,
179
+ 16,
180
+ 32,
181
+ 64
182
+ ],
183
+ "num_return_sequences": 1,
184
+ "output_attentions": false,
185
+ "output_hidden_states": false,
186
+ "output_scores": false,
187
+ "pad_token_id": null,
188
+ "patch_padding": [
189
+ 3,
190
+ 1,
191
+ 1,
192
+ 1
193
+ ],
194
+ "patch_prenorm": [
195
+ false,
196
+ true,
197
+ true,
198
+ true
199
+ ],
200
+ "patch_size": [
201
+ 7,
202
+ 3,
203
+ 3,
204
+ 3
205
+ ],
206
+ "patch_stride": [
207
+ 4,
208
+ 2,
209
+ 2,
210
+ 2
211
+ ],
212
+ "prefix": null,
213
+ "problem_type": null,
214
+ "projection_dim": 1024,
215
+ "pruned_heads": {},
216
+ "remove_invalid_values": false,
217
+ "repetition_penalty": 1.0,
218
+ "return_dict": true,
219
+ "return_dict_in_generate": false,
220
+ "sep_token_id": null,
221
+ "suppress_tokens": null,
222
+ "task_specific_params": null,
223
+ "temperature": 1.0,
224
+ "tf_legacy_loss": false,
225
+ "tie_encoder_decoder": false,
226
+ "tie_word_embeddings": true,
227
+ "tokenizer_class": null,
228
+ "top_k": 50,
229
+ "top_p": 1.0,
230
+ "torch_dtype": null,
231
+ "torchscript": false,
232
+ "typical_p": 1.0,
233
+ "use_bfloat16": false,
234
+ "visual_temporal_embedding": {
235
+ "max_temporal_embeddings": 100,
236
+ "type": "COSINE"
237
+ },
238
+ "window_size": 12
239
+ }
240
+ },
241
+ "hidden_size": 1024,
242
+ "len_soft_prompts": 32,
243
+ "max_action_dim": 23,
244
+ "max_len_seq": 512,
245
+ "mlp_ratio": 4.0,
246
+ "model_type": "xvla",
247
+ "num_actions": 30,
248
+ "num_domains": 30,
249
+ "num_heads": 16,
250
+ "num_objects": 172,
251
+ "num_skills": 34,
252
+ "num_tasks": 50,
253
+ "object_class_weights": [
254
+ 0.4501,
255
+ 0.4205,
256
+ 0.3228,
257
+ 0.3057,
258
+ 0.3521,
259
+ 7.3765,
260
+ 0.2465,
261
+ 0.5885,
262
+ 0.4622,
263
+ 0.3009,
264
+ 0.281,
265
+ 0.3189,
266
+ 0.1221,
267
+ 0.4479,
268
+ 0.312,
269
+ 0.4966,
270
+ 0.3079,
271
+ 0.2972,
272
+ 0.5216,
273
+ 0.1139,
274
+ 0.2091,
275
+ 0.2318,
276
+ 0.1754,
277
+ 0.2122,
278
+ 0.3788,
279
+ 0.2522,
280
+ 0.2869,
281
+ 0.2588,
282
+ 0.5604,
283
+ 0.2401,
284
+ 0.4887,
285
+ 0.2479,
286
+ 0.4339,
287
+ 0.2858,
288
+ 0.4104,
289
+ 0.1655,
290
+ 0.5658,
291
+ 0.1832,
292
+ 0.4361,
293
+ 0.3443,
294
+ 0.4715,
295
+ 0.4035,
296
+ 0.3555,
297
+ 0.5542,
298
+ 0.5411,
299
+ 0.3756,
300
+ 0.6119,
301
+ 0.3592,
302
+ 0.3795,
303
+ 0.17,
304
+ 0.6553,
305
+ 1.8098,
306
+ 0.2901,
307
+ 0.2248,
308
+ 0.2027,
309
+ 0.4599,
310
+ 0.5536,
311
+ 0.508,
312
+ 0.2674,
313
+ 0.5678,
314
+ 0.3297,
315
+ 0.4523,
316
+ 0.2186,
317
+ 0.2029,
318
+ 0.4368,
319
+ 0.432,
320
+ 0.0822,
321
+ 0.3718,
322
+ 0.6172,
323
+ 0.3748,
324
+ 0.2685,
325
+ 0.2913,
326
+ 0.4405,
327
+ 0.3391,
328
+ 0.3676,
329
+ 6.0925,
330
+ 2.0803,
331
+ 2.153,
332
+ 2.2968,
333
+ 2.1286,
334
+ 12.6916,
335
+ 0.5856,
336
+ 5.4459,
337
+ 2.6565,
338
+ 13.1846,
339
+ 23.4236,
340
+ 0.478,
341
+ 0.462,
342
+ 0.1174,
343
+ 0.4406,
344
+ 0.2615,
345
+ 0.2668,
346
+ 0.2285,
347
+ 0.3084,
348
+ 0.2523,
349
+ 0.3026,
350
+ 0.2844,
351
+ 0.5057,
352
+ 0.343,
353
+ 0.2129,
354
+ 0.2497,
355
+ 0.3409,
356
+ 0.3574,
357
+ 0.3983,
358
+ 0.1783,
359
+ 0.2182,
360
+ 0.3948,
361
+ 3.4148,
362
+ 0.3134,
363
+ 0.4798,
364
+ 0.3458,
365
+ 0.2877,
366
+ 0.4385,
367
+ 0.2716,
368
+ 0.4203,
369
+ 0.2383,
370
+ 0.2431,
371
+ 0.2523,
372
+ 0.4795,
373
+ 0.4197,
374
+ 0.2102,
375
+ 0.15,
376
+ 0.3516,
377
+ 1.1414,
378
+ 0.1527,
379
+ 0.4403,
380
+ 0.188,
381
+ 0.5024,
382
+ 0.367,
383
+ 0.4584,
384
+ 0.293,
385
+ 0.3649,
386
+ 23.0659,
387
+ 0.1816,
388
+ 0.4641,
389
+ 0.438,
390
+ 0.2461,
391
+ 2.3487,
392
+ 0.3496,
393
+ 0.405,
394
+ 0.3314,
395
+ 0.489,
396
+ 0.4397,
397
+ 0.4996,
398
+ 0.1229,
399
+ 3.6319,
400
+ 0.4439,
401
+ 0.2366,
402
+ 0.2766,
403
+ 0.2168,
404
+ 0.3219,
405
+ 0.3337,
406
+ 0.4465,
407
+ 0.7571,
408
+ 0.257,
409
+ 0.3779,
410
+ 0.1382,
411
+ 0.2154,
412
+ 0.2358,
413
+ 0.4292,
414
+ 0.3343,
415
+ 0.3047,
416
+ 0.8751,
417
+ 0.3987,
418
+ 0.146,
419
+ 0.2943,
420
+ 0.8747,
421
+ 0.1515,
422
+ 0.1481,
423
+ 0.5527,
424
+ 2.8632,
425
+ 0.3923
426
+ ],
427
+ "object_classifier_weight": 0.1,
428
+ "object_prompt_length": 0,
429
+ "real_action_dim": 23,
430
+ "skill_class_weights": [
431
+ 0.0859,
432
+ 0.1021,
433
+ 0.2138,
434
+ 0.1674,
435
+ 0.8973,
436
+ 1.059,
437
+ 1.6727,
438
+ 0.9436,
439
+ 0.1984,
440
+ 1.1787,
441
+ 0.2744,
442
+ 0.621,
443
+ 0.6252,
444
+ 1.8634,
445
+ 0.5304,
446
+ 0.5203,
447
+ 0.8339,
448
+ 0.8988,
449
+ 1.4175,
450
+ 2.2557,
451
+ 0.5537,
452
+ 1.1661,
453
+ 3.0415,
454
+ 0.1975,
455
+ 0.3228,
456
+ 1.2419,
457
+ 0.6963,
458
+ 1.2705,
459
+ 0.4855,
460
+ 1.1792,
461
+ 1.4312,
462
+ 2.6619,
463
+ 1.3428,
464
+ 2.0497
465
+ ],
466
+ "skill_classifier_weight": 0.1,
467
+ "skill_prompt_length": 32,
468
+ "soft_prompt_length": 32,
469
+ "task_class_weights": [
470
+ 2.173,
471
+ 1.4191,
472
+ 0.8104,
473
+ 0.8292,
474
+ 0.8919,
475
+ 0.9968,
476
+ 1.1146,
477
+ 0.707,
478
+ 1.029,
479
+ 0.8308,
480
+ 1.2263,
481
+ 0.928,
482
+ 1.0689,
483
+ 0.6989,
484
+ 0.8124,
485
+ 0.8448,
486
+ 0.8113,
487
+ 1.0045,
488
+ 0.9249,
489
+ 0.9423,
490
+ 0.8886,
491
+ 0.7005,
492
+ 1.1158,
493
+ 0.6354,
494
+ 0.6881,
495
+ 0.8477,
496
+ 0.5946,
497
+ 0.7707,
498
+ 0.7753,
499
+ 0.6626,
500
+ 1.0688,
501
+ 1.0711,
502
+ 1.0735,
503
+ 0.9245,
504
+ 2.0169,
505
+ 1.6108,
506
+ 0.8854,
507
+ 1.4132,
508
+ 1.2177,
509
+ 1.0697,
510
+ 1.7812,
511
+ 0.8162,
512
+ 1.2199,
513
+ 0.7956,
514
+ 0.9433,
515
+ 1.0236,
516
+ 1.1092,
517
+ 0.8773,
518
+ 0.6393,
519
+ 0.6997
520
+ ],
521
+ "task_classifier_weight": 0.1,
522
+ "task_prompt_length": 0,
523
+ "task_valid_objects": {
524
+ "0": [
525
+ 47,
526
+ 131
527
+ ],
528
+ "1": [
529
+ 33,
530
+ 62,
531
+ 159
532
+ ],
533
+ "2": [
534
+ 19,
535
+ 38,
536
+ 47,
537
+ 62,
538
+ 121,
539
+ 130
540
+ ],
541
+ "3": [
542
+ 22,
543
+ 27,
544
+ 56,
545
+ 66,
546
+ 124
547
+ ],
548
+ "4": [
549
+ 26,
550
+ 49,
551
+ 90,
552
+ 132,
553
+ 156
554
+ ],
555
+ "5": [
556
+ 19,
557
+ 62,
558
+ 68,
559
+ 104,
560
+ 153
561
+ ],
562
+ "6": [
563
+ 58,
564
+ 161,
565
+ 168
566
+ ],
567
+ "7": [
568
+ 7,
569
+ 12,
570
+ 50,
571
+ 94,
572
+ 151,
573
+ 157
574
+ ],
575
+ "8": [
576
+ 5,
577
+ 64,
578
+ 65,
579
+ 132,
580
+ 152,
581
+ 156
582
+ ],
583
+ "9": [
584
+ 27,
585
+ 34,
586
+ 43,
587
+ 47,
588
+ 62,
589
+ 70,
590
+ 121,
591
+ 132,
592
+ 140,
593
+ 168,
594
+ 170
595
+ ],
596
+ "10": [
597
+ 15,
598
+ 45,
599
+ 46,
600
+ 48,
601
+ 59,
602
+ 114,
603
+ 134,
604
+ 137
605
+ ],
606
+ "11": [
607
+ 5,
608
+ 48,
609
+ 124,
610
+ 156
611
+ ],
612
+ "12": [
613
+ 18,
614
+ 30,
615
+ 40,
616
+ 42,
617
+ 44,
618
+ 48,
619
+ 66,
620
+ 72,
621
+ 112
622
+ ],
623
+ "13": [
624
+ 35,
625
+ 47,
626
+ 53,
627
+ 54,
628
+ 62,
629
+ 132,
630
+ 150,
631
+ 157
632
+ ],
633
+ "14": [
634
+ 8,
635
+ 27,
636
+ 35,
637
+ 36,
638
+ 54,
639
+ 62,
640
+ 66,
641
+ 113
642
+ ],
643
+ "15": [
644
+ 54,
645
+ 62,
646
+ 126
647
+ ],
648
+ "16": [
649
+ 54,
650
+ 62,
651
+ 144
652
+ ],
653
+ "17": [
654
+ 9,
655
+ 47,
656
+ 66
657
+ ],
658
+ "18": [
659
+ 7,
660
+ 62,
661
+ 88,
662
+ 109,
663
+ 133
664
+ ],
665
+ "19": [
666
+ 0,
667
+ 48,
668
+ 55,
669
+ 61,
670
+ 125,
671
+ 135,
672
+ 154
673
+ ],
674
+ "20": [
675
+ 13,
676
+ 28,
677
+ 62,
678
+ 97,
679
+ 101,
680
+ 146,
681
+ 165,
682
+ 168
683
+ ],
684
+ "21": [
685
+ 7,
686
+ 12,
687
+ 14,
688
+ 50,
689
+ 52,
690
+ 62,
691
+ 145,
692
+ 149,
693
+ 158
694
+ ],
695
+ "22": [
696
+ 62,
697
+ 71,
698
+ 86,
699
+ 133
700
+ ],
701
+ "23": [
702
+ 14,
703
+ 62,
704
+ 88,
705
+ 140,
706
+ 144
707
+ ],
708
+ "24": [
709
+ 3,
710
+ 17,
711
+ 19,
712
+ 23,
713
+ 48,
714
+ 93
715
+ ],
716
+ "25": [
717
+ 5,
718
+ 27,
719
+ 66,
720
+ 73,
721
+ 74,
722
+ 124,
723
+ 164
724
+ ],
725
+ "26": [
726
+ 19,
727
+ 21,
728
+ 31,
729
+ 47,
730
+ 62,
731
+ 121,
732
+ 147,
733
+ 168
734
+ ],
735
+ "27": [
736
+ 16,
737
+ 24,
738
+ 45,
739
+ 62,
740
+ 87,
741
+ 106,
742
+ 132,
743
+ 137,
744
+ 138,
745
+ 139,
746
+ 155,
747
+ 163
748
+ ],
749
+ "28": [
750
+ 50,
751
+ 51,
752
+ 62,
753
+ 63,
754
+ 95,
755
+ 102,
756
+ 103,
757
+ 110,
758
+ 117,
759
+ 132,
760
+ 148
761
+ ],
762
+ "29": [
763
+ 7,
764
+ 14,
765
+ 50,
766
+ 57,
767
+ 62,
768
+ 63,
769
+ 96,
770
+ 115,
771
+ 117,
772
+ 118,
773
+ 119,
774
+ 132,
775
+ 142
776
+ ],
777
+ "30": [
778
+ 47,
779
+ 60,
780
+ 62,
781
+ 98,
782
+ 108,
783
+ 169
784
+ ],
785
+ "31": [
786
+ 25,
787
+ 48,
788
+ 132,
789
+ 167
790
+ ],
791
+ "32": [
792
+ 6,
793
+ 48,
794
+ 132,
795
+ 167
796
+ ],
797
+ "33": [
798
+ 20,
799
+ 141,
800
+ 149,
801
+ 151,
802
+ 167
803
+ ],
804
+ "34": [
805
+ 5,
806
+ 62,
807
+ 128,
808
+ 166
809
+ ],
810
+ "35": [
811
+ 32,
812
+ 53,
813
+ 62
814
+ ],
815
+ "36": [
816
+ 29,
817
+ 62,
818
+ 69,
819
+ 105,
820
+ 132
821
+ ],
822
+ "37": [
823
+ 50,
824
+ 62,
825
+ 136,
826
+ 162
827
+ ],
828
+ "38": [
829
+ 62,
830
+ 69,
831
+ 92,
832
+ 129,
833
+ 132
834
+ ],
835
+ "39": [
836
+ 62,
837
+ 120,
838
+ 161
839
+ ],
840
+ "40": [
841
+ 5,
842
+ 100,
843
+ 127
844
+ ],
845
+ "41": [
846
+ 30,
847
+ 37,
848
+ 39,
849
+ 48,
850
+ 49,
851
+ 56,
852
+ 66,
853
+ 67,
854
+ 75,
855
+ 89,
856
+ 124,
857
+ 132
858
+ ],
859
+ "42": [
860
+ 5,
861
+ 22,
862
+ 49,
863
+ 56,
864
+ 82,
865
+ 116,
866
+ 132,
867
+ 165
868
+ ],
869
+ "43": [
870
+ 10,
871
+ 11,
872
+ 48,
873
+ 49,
874
+ 56,
875
+ 66,
876
+ 84,
877
+ 116,
878
+ 132,
879
+ 171
880
+ ],
881
+ "44": [
882
+ 2,
883
+ 41,
884
+ 76,
885
+ 77,
886
+ 78,
887
+ 79,
888
+ 85,
889
+ 99
890
+ ],
891
+ "45": [
892
+ 48,
893
+ 62,
894
+ 66,
895
+ 91,
896
+ 100
897
+ ],
898
+ "46": [
899
+ 30,
900
+ 48,
901
+ 62,
902
+ 66,
903
+ 67,
904
+ 160
905
+ ],
906
+ "47": [
907
+ 1,
908
+ 20,
909
+ 48,
910
+ 62,
911
+ 66,
912
+ 124,
913
+ 164
914
+ ],
915
+ "48": [
916
+ 19,
917
+ 22,
918
+ 37,
919
+ 48,
920
+ 49,
921
+ 56,
922
+ 66,
923
+ 81,
924
+ 122,
925
+ 143
926
+ ],
927
+ "49": [
928
+ 4,
929
+ 5,
930
+ 22,
931
+ 37,
932
+ 42,
933
+ 56,
934
+ 62,
935
+ 66,
936
+ 80,
937
+ 83,
938
+ 107,
939
+ 111,
940
+ 123,
941
+ 132,
942
+ 156,
943
+ 164,
944
+ 165
945
+ ]
946
+ },
947
+ "task_valid_skills": {
948
+ "0": [
949
+ 0,
950
+ 1,
951
+ 2,
952
+ 19
953
+ ],
954
+ "1": [
955
+ 0,
956
+ 1,
957
+ 2,
958
+ 3
959
+ ],
960
+ "2": [
961
+ 0,
962
+ 1,
963
+ 3,
964
+ 7,
965
+ 9,
966
+ 24
967
+ ],
968
+ "3": [
969
+ 0,
970
+ 1,
971
+ 3,
972
+ 8,
973
+ 10,
974
+ 23
975
+ ],
976
+ "4": [
977
+ 0,
978
+ 1,
979
+ 3,
980
+ 4,
981
+ 8,
982
+ 10,
983
+ 11,
984
+ 12,
985
+ 26
986
+ ],
987
+ "5": [
988
+ 0,
989
+ 1,
990
+ 24
991
+ ],
992
+ "6": [
993
+ 0,
994
+ 1,
995
+ 2,
996
+ 24
997
+ ],
998
+ "7": [
999
+ 0,
1000
+ 1,
1001
+ 2,
1002
+ 3,
1003
+ 4,
1004
+ 23
1005
+ ],
1006
+ "8": [
1007
+ 0,
1008
+ 1,
1009
+ 3,
1010
+ 8,
1011
+ 10,
1012
+ 23,
1013
+ 25,
1014
+ 26
1015
+ ],
1016
+ "9": [
1017
+ 0,
1018
+ 1,
1019
+ 2,
1020
+ 14,
1021
+ 23,
1022
+ 24
1023
+ ],
1024
+ "10": [
1025
+ 0,
1026
+ 1,
1027
+ 2,
1028
+ 3,
1029
+ 23,
1030
+ 24
1031
+ ],
1032
+ "11": [
1033
+ 0,
1034
+ 1,
1035
+ 2,
1036
+ 3,
1037
+ 8,
1038
+ 10
1039
+ ],
1040
+ "12": [
1041
+ 0,
1042
+ 1,
1043
+ 3,
1044
+ 8,
1045
+ 10,
1046
+ 23,
1047
+ 24
1048
+ ],
1049
+ "13": [
1050
+ 0,
1051
+ 1,
1052
+ 3,
1053
+ 8,
1054
+ 11,
1055
+ 12,
1056
+ 23
1057
+ ],
1058
+ "14": [
1059
+ 0,
1060
+ 1,
1061
+ 2,
1062
+ 3,
1063
+ 4,
1064
+ 8,
1065
+ 10,
1066
+ 12,
1067
+ 14,
1068
+ 30
1069
+ ],
1070
+ "15": [
1071
+ 0,
1072
+ 1,
1073
+ 2,
1074
+ 8
1075
+ ],
1076
+ "16": [
1077
+ 0,
1078
+ 1,
1079
+ 2,
1080
+ 8
1081
+ ],
1082
+ "17": [
1083
+ 0,
1084
+ 1,
1085
+ 2,
1086
+ 4,
1087
+ 8,
1088
+ 10
1089
+ ],
1090
+ "18": [
1091
+ 0,
1092
+ 1,
1093
+ 2,
1094
+ 4,
1095
+ 23,
1096
+ 24
1097
+ ],
1098
+ "19": [
1099
+ 0,
1100
+ 1,
1101
+ 2,
1102
+ 3,
1103
+ 11,
1104
+ 12,
1105
+ 23
1106
+ ],
1107
+ "20": [
1108
+ 0,
1109
+ 1,
1110
+ 2,
1111
+ 3,
1112
+ 23
1113
+ ],
1114
+ "21": [
1115
+ 0,
1116
+ 1,
1117
+ 2,
1118
+ 3,
1119
+ 4,
1120
+ 23,
1121
+ 24
1122
+ ],
1123
+ "22": [
1124
+ 0,
1125
+ 1,
1126
+ 3
1127
+ ],
1128
+ "23": [
1129
+ 0,
1130
+ 1,
1131
+ 2,
1132
+ 3,
1133
+ 23
1134
+ ],
1135
+ "24": [
1136
+ 0,
1137
+ 1,
1138
+ 2,
1139
+ 3,
1140
+ 8,
1141
+ 10,
1142
+ 23,
1143
+ 30
1144
+ ],
1145
+ "25": [
1146
+ 0,
1147
+ 1,
1148
+ 2,
1149
+ 3,
1150
+ 8,
1151
+ 10,
1152
+ 23
1153
+ ],
1154
+ "26": [
1155
+ 0,
1156
+ 1,
1157
+ 2,
1158
+ 3,
1159
+ 4,
1160
+ 23
1161
+ ],
1162
+ "27": [
1163
+ 0,
1164
+ 1,
1165
+ 2,
1166
+ 3,
1167
+ 8,
1168
+ 23,
1169
+ 29
1170
+ ],
1171
+ "28": [
1172
+ 0,
1173
+ 1,
1174
+ 2,
1175
+ 4,
1176
+ 23,
1177
+ 24,
1178
+ 26
1179
+ ],
1180
+ "29": [
1181
+ 0,
1182
+ 1,
1183
+ 2,
1184
+ 3,
1185
+ 4,
1186
+ 5,
1187
+ 12,
1188
+ 23,
1189
+ 24,
1190
+ 26
1191
+ ],
1192
+ "30": [
1193
+ 0,
1194
+ 1,
1195
+ 3,
1196
+ 20,
1197
+ 21,
1198
+ 22,
1199
+ 23
1200
+ ],
1201
+ "31": [
1202
+ 0,
1203
+ 1,
1204
+ 3,
1205
+ 8,
1206
+ 10,
1207
+ 20,
1208
+ 26
1209
+ ],
1210
+ "32": [
1211
+ 0,
1212
+ 1,
1213
+ 3,
1214
+ 8,
1215
+ 10,
1216
+ 20,
1217
+ 26
1218
+ ],
1219
+ "33": [
1220
+ 0,
1221
+ 1,
1222
+ 3,
1223
+ 8,
1224
+ 10,
1225
+ 20,
1226
+ 23
1227
+ ],
1228
+ "34": [
1229
+ 0,
1230
+ 1,
1231
+ 4,
1232
+ 18,
1233
+ 23
1234
+ ],
1235
+ "35": [
1236
+ 0,
1237
+ 1,
1238
+ 2,
1239
+ 6,
1240
+ 13,
1241
+ 27
1242
+ ],
1243
+ "36": [
1244
+ 0,
1245
+ 1,
1246
+ 2,
1247
+ 4,
1248
+ 6,
1249
+ 17,
1250
+ 23,
1251
+ 27
1252
+ ],
1253
+ "37": [
1254
+ 0,
1255
+ 1,
1256
+ 2,
1257
+ 4,
1258
+ 16
1259
+ ],
1260
+ "38": [
1261
+ 0,
1262
+ 1,
1263
+ 20,
1264
+ 21,
1265
+ 23,
1266
+ 28
1267
+ ],
1268
+ "39": [
1269
+ 0,
1270
+ 1,
1271
+ 20,
1272
+ 21,
1273
+ 28
1274
+ ],
1275
+ "40": [
1276
+ 0,
1277
+ 1,
1278
+ 3,
1279
+ 8,
1280
+ 10,
1281
+ 20
1282
+ ],
1283
+ "41": [
1284
+ 0,
1285
+ 1,
1286
+ 2,
1287
+ 8,
1288
+ 10,
1289
+ 14,
1290
+ 15,
1291
+ 20,
1292
+ 23,
1293
+ 24,
1294
+ 26
1295
+ ],
1296
+ "42": [
1297
+ 0,
1298
+ 1,
1299
+ 2,
1300
+ 3,
1301
+ 14,
1302
+ 15,
1303
+ 23,
1304
+ 24,
1305
+ 26
1306
+ ],
1307
+ "43": [
1308
+ 0,
1309
+ 1,
1310
+ 2,
1311
+ 8,
1312
+ 10,
1313
+ 15,
1314
+ 23,
1315
+ 26
1316
+ ],
1317
+ "44": [
1318
+ 0,
1319
+ 1,
1320
+ 2,
1321
+ 15,
1322
+ 23,
1323
+ 33
1324
+ ],
1325
+ "45": [
1326
+ 0,
1327
+ 1,
1328
+ 2,
1329
+ 3,
1330
+ 8,
1331
+ 10,
1332
+ 20
1333
+ ],
1334
+ "46": [
1335
+ 0,
1336
+ 1,
1337
+ 2,
1338
+ 8,
1339
+ 10,
1340
+ 14,
1341
+ 20,
1342
+ 24,
1343
+ 26
1344
+ ],
1345
+ "47": [
1346
+ 0,
1347
+ 1,
1348
+ 2,
1349
+ 3,
1350
+ 8,
1351
+ 10
1352
+ ],
1353
+ "48": [
1354
+ 0,
1355
+ 1,
1356
+ 2,
1357
+ 3,
1358
+ 8,
1359
+ 10,
1360
+ 14,
1361
+ 15,
1362
+ 23,
1363
+ 24
1364
+ ],
1365
+ "49": [
1366
+ 0,
1367
+ 1,
1368
+ 2,
1369
+ 3,
1370
+ 8,
1371
+ 10,
1372
+ 14,
1373
+ 15,
1374
+ 20,
1375
+ 23,
1376
+ 24,
1377
+ 26,
1378
+ 31,
1379
+ 32
1380
+ ]
1381
+ },
1382
+ "torch_dtype": "float32",
1383
+ "transformers_version": "4.49.0",
1384
+ "use_hetero_proj": false,
1385
+ "use_proprio": true
1386
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e04770e92b416b2f92d45fdca1b6f2e72061dc006c372079541ffbb75b9e8ead
3
+ size 3626506856
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd470b6cf5eff22e027ff218190b5255ea28eee8e4176ba90aa94ab3ccaa0b46
3
+ size 7243647846
preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "processing_xvla.XVLAProcessor"
4
+ },
5
+ "crop_size": {
6
+ "height": 224,
7
+ "width": 224
8
+ },
9
+ "do_center_crop": false,
10
+ "do_convert_rgb": null,
11
+ "do_normalize": true,
12
+ "do_rescale": true,
13
+ "do_resize": true,
14
+ "image_mean": [
15
+ 0.485,
16
+ 0.456,
17
+ 0.406
18
+ ],
19
+ "image_processor_type": "CLIPImageProcessor",
20
+ "image_std": [
21
+ 0.229,
22
+ 0.224,
23
+ 0.225
24
+ ],
25
+ "processor_class": "XVLAProcessor",
26
+ "resample": 3,
27
+ "rescale_factor": 0.00392156862745098,
28
+ "size": {
29
+ "height": 224,
30
+ "width": 224
31
+ }
32
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 100000}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 1024,
53
+ "pad_token": "<pad>",
54
+ "processor_class": "XVLAProcessor",
55
+ "sep_token": "</s>",
56
+ "tokenizer_class": "BartTokenizer",
57
+ "trim_offsets": true,
58
+ "unk_token": "<unk>"
59
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff