{
"vision_backbone": "dinov2_small",
"language_backbone": "smollm_135m",
"vision_model": "facebook/dinov2-small",
"vision_pretrained": true,
"freeze_vision": true,
"num_visual_tokens": 258,
"vision_output_dim": 384,
"image_size": 224,
"language_hidden_size": 576,
"language_num_layers": 30,
"language_num_heads": 9,
"language_vocab_size": 49161,
"language_max_length": 1024,
"freeze_language_base": true,
"unfreeze_last_layer": true,
"use_pretrained_language": true,
"pretrained_language_model": "HuggingFaceTB/SmolLM-135M",
"fusion_bottleneck_dim": 48,
"fusion_dropout": 0.1,
"use_qk_norm": true,
"reasoning_enabled": true,
"reasoning_hidden_dim": 192,
"reasoning_num_layers": 2,
"reasoning_num_heads": 4,
"num_reasoning_steps": 4,
"max_plan_steps": 5,
"num_robots": 5,
"robot_names": [
"Drone",
"Humanoid",
"Wheeled",
"Legged",
"Underwater"
],
"special_tokens": {
"reasoning_start": "<|reasoning_start|>",
"reasoning_end": "<|reasoning_end|>",
"robot_selection": "<|robot_selection|>",
"action_plan": "<|action_plan|>",
"image_start": "",
"image_end": "",
"question_start": "",
"question_end": "",
"answer_start": ""
},
"dropout": 0.1,
"initializer_range": 0.02,
"use_va_refiner": false,
"va_p_threshold": 0.7,
"va_layer_indices": [
10,
15,
20,
25,
28
],
"vocab_size": 49161
}