{ "hidden_size": 256, "num_attention_heads": 2, "attention_head_dim": 128, "in_channels": 16, "patch_size": 1, "joint_attention_dim": 768, "pooled_projection_dim": 768, "num_double_layers": 3, "num_single_layers": 3, "mlp_ratio": 4.0, "axes_dims_rope": [ 16, 56, 56 ], "guidance_embeds": true }