{ "model_type": "humanoid-vision", "image_size": 224, "hidden_size": 384, "num_layers": 6, "num_attention_heads": 6, "description": "Vision-based perception model for humanoid robots" }