| { | |
| "vid_in_channels": 33, | |
| "vid_out_channels": 16, | |
| "vid_dim": 2560, | |
| "vid_out_norm": "rms", | |
| "txt_in_dim": 5120, | |
| "txt_in_norm": "layer", | |
| "txt_dim": 2560, | |
| "emb_dim": 15360, | |
| "heads": 20, | |
| "head_dim": 128, | |
| "expand_ratio": 4, | |
| "norm": "rms", | |
| "norm_eps": 1.0e-06, | |
| "ada": "single", | |
| "qk_bias": false, | |
| "qk_norm": "rms", | |
| "patch_size": [1, 2, 2], | |
| "num_layers": 32, | |
| "mm_layers": 10, | |
| "mlp_type": "swiglu", | |
| "msa_type": null, | |
| "rope_type": "mmrope3d", | |
| "rope_dim": 128 | |
| } | |