{ "vision_backbone": "dinov2_small", "language_backbone": "smollm_135m", "vision_model": "facebook/dinov2-small", "vision_pretrained": true, "freeze_vision": true, "num_visual_tokens": 258, "vision_output_dim": 384, "image_size": 224, "language_hidden_size": 576, "language_num_layers": 30, "language_num_heads": 9, "language_vocab_size": 49161, "language_max_length": 1024, "freeze_language_base": true, "unfreeze_last_layer": true, "use_pretrained_language": true, "pretrained_language_model": "HuggingFaceTB/SmolLM-135M", "fusion_bottleneck_dim": 48, "fusion_dropout": 0.1, "use_qk_norm": true, "reasoning_enabled": true, "reasoning_hidden_dim": 192, "reasoning_num_layers": 2, "reasoning_num_heads": 4, "num_reasoning_steps": 4, "max_plan_steps": 5, "num_robots": 5, "robot_names": [ "Drone", "Humanoid", "Wheeled", "Legged", "Underwater" ], "special_tokens": { "reasoning_start": "<|reasoning_start|>", "reasoning_end": "<|reasoning_end|>", "robot_selection": "<|robot_selection|>", "action_plan": "<|action_plan|>", "image_start": "", "image_end": "", "question_start": "", "question_end": "", "answer_start": "" }, "dropout": 0.1, "initializer_range": 0.02, "use_va_refiner": false, "va_p_threshold": 0.7, "va_layer_indices": [ 10, 15, 20, 25, 28 ], "vocab_size": 49161 }