| { | |
| "_class_name": "NitroMMDiTModel", | |
| "_diffusers_version": "0.34.0", | |
| "attention_head_dim": 64, | |
| "caption_channels": 2048, | |
| "caption_projection_dim": 1152, | |
| "dual_attention_layers": [ | |
| 0, | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 5, | |
| 6, | |
| 7, | |
| 8, | |
| 9, | |
| 10, | |
| 11, | |
| 12 | |
| ], | |
| "in_channels": 32, | |
| "interpolation_scale": 2, | |
| "joint_attention_dim": 4096, | |
| "num_attention_heads": 18, | |
| "num_layers": 24, | |
| "out_channels": 32, | |
| "patch_size": 1, | |
| "pooled_projection_dim": 2048, | |
| "pos_embed_max_size": 96, | |
| "projector_dim": 2048, | |
| "qk_norm": "rms_norm", | |
| "repa_depth": -1, | |
| "sample_size": 32, | |
| "z_dims": [ | |
| 768 | |
| ] | |
| } | |