| { | |
| "resolution_mode": "native", | |
| "min_tokens": 256, | |
| "max_tokens": 16384, | |
| "patch_size": 14, | |
| "resize_factor": 2, | |
| "spatial_merge_size": 1, | |
| "temporal_patch_size": 2, | |
| "num_hidden_layers": 24, | |
| "num_attention_heads": 16, | |
| "hidden_size": 1024, | |
| "intermediate_size": 4224, | |
| "pe_type": "rope2d", | |
| "norm_type": "RMSNorm", | |
| "hidden_act": "SwiGLU", | |
| "init_method": "xavier", | |
| "image_mean": [0.485, 0.456, 0.406], | |
| "image_std": [0.229, 0.224, 0.225] | |
| } |