| { | |
| "action_mode": "libero_joint", | |
| "alignment_action_token_layer": 3, | |
| "alignment_controllable_source": "action_tokens", | |
| "alignment_loss_weight": 1.0, | |
| "alignment_objective": "align", | |
| "alignment_pooling": "concat", | |
| "alignment_projector_dim": 768, | |
| "alignment_target_mode": "controllable", | |
| "architectures": [ | |
| "SmolVLMVLA" | |
| ], | |
| "depth": 12, | |
| "dim_time": 32, | |
| "dtype": "float32", | |
| "hidden_size": 768, | |
| "image_size": 384, | |
| "lam_frame_interval": 5, | |
| "lam_latent_dim": 512, | |
| "max_len_seq": 512, | |
| "mlp_ratio": 4.0, | |
| "model_type": "smolvlm_vla", | |
| "num_actions": 50, | |
| "num_heads": 12, | |
| "num_views": 3, | |
| "precomputed_lam_controllable_key": "lam_latent_actions_controllable", | |
| "precomputed_lam_uncontrollable_key": "lam_latent_actions_uncontrollable", | |
| "smolvlm_model_path": "HuggingFaceTB/SmolVLM-500M-Instruct", | |
| "transformers_version": "5.2.0", | |
| "use_adaln": false, | |
| "use_jit_x_prediction_v_loss": false, | |
| "use_proprio": true | |
| } | |