{ "action_mode": "libero_joint", "alignment_action_token_layer": 3, "alignment_controllable_source": "action_tokens", "alignment_loss_weight": 1.0, "alignment_objective": "align", "alignment_pooling": "concat", "alignment_projector_dim": 768, "alignment_target_mode": "controllable", "architectures": [ "SmolVLMVLA" ], "depth": 12, "dim_time": 32, "dtype": "float32", "hidden_size": 768, "image_size": 384, "lam_frame_interval": 5, "lam_latent_dim": 512, "max_len_seq": 512, "mlp_ratio": 4.0, "model_type": "smolvlm_vla", "num_actions": 50, "num_heads": 12, "num_views": 3, "precomputed_lam_controllable_key": "lam_latent_actions_controllable", "precomputed_lam_uncontrollable_key": "lam_latent_actions_uncontrollable", "smolvlm_model_path": "HuggingFaceTB/SmolVLM-500M-Instruct", "transformers_version": "5.2.0", "use_adaln": false, "use_jit_x_prediction_v_loss": false, "use_proprio": true }