| num_layers: 32 | |
| num_heads: 8 | |
| d_model: 256 | |
| T: 16 | |
| S: 256 | |
| image_vocab_size: 262144 | |
| use_mup: false | |
| num_factored_vocabs: 2 | |
| factored_vocab_size: 512 | |
| max_corrupt_rate: 0.2 | |
| non_mlm_ratio: 0.5 | |
| num_prompt_frames: 8 | |
| qkv_bias: false | |
| proj_bias: true | |
| attn_drop: 0 | |
| qk_norm: false | |
| mlp_ratio: 4 | |
| mlp_drop: 0 | |
| mlp_bias: true |