{ "sample_size": 128, "patch_size": 2, "in_channels": 16, "num_layers": 18, "attention_head_dim": 64, "num_attention_heads": 18, "joint_attention_dim": 4096, "caption_projection_dim": 1152, "pooled_projection_dim": 2048, "out_channels": 16, "pos_embed_max_size": 96, "dual_attention_layers": [], "qk_norm": null, "_use_default_values": [ "patch_size", "num_attention_heads", "dual_attention_layers", "sample_size", "joint_attention_dim", "attention_head_dim", "num_layers", "caption_projection_dim", "out_channels", "qk_norm", "pooled_projection_dim", "in_channels", "pos_embed_max_size" ] }