{ "depth": 27, "hidden_size": 1152, "hidden_act": "gelu_pytorch_tanh", "intermediate_size": 4304, "num_heads": 16, "in_channels": 3, "patch_size": 16, "spatial_merge_size": 2, "temporal_patch_size": 2, "out_hidden_size": 4096, "initializer_range": 0.02, "use_bsq": true, "bsq_dim": 64, "bsq_hidden_dim": 8192, "bsq_skip_final_layernorm": true, "vistok_pred": false, "vistok_pred_layernorm": false, "vistok_pred_transformer_head": false, "architectures": [ "UniARVisionModel" ], "model_type": "uniar_vision", "deepstack_visual_indexes": [ 8, 16, 24 ], "num_position_embeddings": 2304 }