{ "dtype": "bfloat16", "dit_num_layers": 18, "dit_num_heads": 8, "dit_head_dim": 128, "dit_dropout": 0.0, "dit_cross_attention_dim": null, "dit_interleave_self_attention": true, "dit_layerwise_vlm_features": false, "chunk_size": 50, "n_action_steps": 50, "max_state_dim": 32, "max_action_dim": 32, "num_inference_steps": 10, "image_resolution": [ 224, 224 ], "attn_implementation": "flash_attention_2", "tokenizer_max_length": 48, "training_phase": "posttrain", "action_mode": "delta", "knowledge_isolation": true, "use_fast_tokenizer": true, "discrete_action_vocab_size": 2048 }