| { | |
| "_class_name": "NucleusMoEImageTransformer2DModel", | |
| "_diffusers_version": "0.38.0.dev0", | |
| "patch_size": 2, | |
| "in_channels": 64, | |
| "out_channels": 16, | |
| "num_layers": 32, | |
| "attention_head_dim": 128, | |
| "num_attention_heads": 16, | |
| "num_key_value_heads": 4, | |
| "joint_attention_dim": 4096, | |
| "axes_dims_rope": [ | |
| 16, | |
| 56, | |
| 56 | |
| ], | |
| "mlp_ratio": 4.0, | |
| "moe_enabled": true, | |
| "dense_moe_strategy": "leave_first_three_blocks_dense", | |
| "num_experts": 64, | |
| "moe_intermediate_dim": 1344, | |
| "capacity_factors": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 4.0, | |
| 4.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0, | |
| 2.0 | |
| ], | |
| "use_sigmoid": false, | |
| "route_scale": 2.5, | |
| "use_grouped_mm": true | |
| } | |