| { | |
| "_class_name": "ControlNetXSModel", | |
| "_diffusers_version": "0.24.0.dev0", | |
| "base_model_channel_sizes": { | |
| "down": [ | |
| [ | |
| 4, | |
| 320 | |
| ], | |
| [ | |
| 320, | |
| 320 | |
| ], | |
| [ | |
| 320, | |
| 320 | |
| ], | |
| [ | |
| 320, | |
| 320 | |
| ], | |
| [ | |
| 320, | |
| 640 | |
| ], | |
| [ | |
| 640, | |
| 640 | |
| ], | |
| [ | |
| 640, | |
| 640 | |
| ], | |
| [ | |
| 640, | |
| 1280 | |
| ], | |
| [ | |
| 1280, | |
| 1280 | |
| ] | |
| ], | |
| "mid": [ | |
| [ | |
| 1280, | |
| 1280 | |
| ] | |
| ], | |
| "up": [ | |
| [ | |
| 2560, | |
| 1280 | |
| ], | |
| [ | |
| 2560, | |
| 1280 | |
| ], | |
| [ | |
| 1920, | |
| 1280 | |
| ], | |
| [ | |
| 1920, | |
| 640 | |
| ], | |
| [ | |
| 1280, | |
| 640 | |
| ], | |
| [ | |
| 960, | |
| 640 | |
| ], | |
| [ | |
| 960, | |
| 320 | |
| ], | |
| [ | |
| 640, | |
| 320 | |
| ], | |
| [ | |
| 640, | |
| 320 | |
| ] | |
| ] | |
| }, | |
| "block_out_channels": [ | |
| 32, | |
| 64, | |
| 128 | |
| ], | |
| "conditioning_channels": 3, | |
| "conditioning_embedding_out_channels": [ | |
| 16, | |
| 32, | |
| 96, | |
| 256 | |
| ], | |
| "controlnet_conditioning_channel_order": "rgb", | |
| "cross_attention_dim": 2048, | |
| "down_block_types": [ | |
| "DownBlock2D", | |
| "CrossAttnDownBlock2D", | |
| "CrossAttnDownBlock2D" | |
| ], | |
| "learn_embedding": true, | |
| "norm_num_groups": 32, | |
| "num_attention_heads": [ | |
| 1, | |
| 1, | |
| 2 | |
| ], | |
| "sample_size": 128, | |
| "time_embedding_dim": 1280, | |
| "time_embedding_input_dim": 320, | |
| "time_embedding_mix": 0.95, | |
| "transformer_layers_per_block": [ | |
| 1, | |
| 2, | |
| 10 | |
| ], | |
| "up_block_types": [ | |
| "CrossAttnUpBlock2D", | |
| "CrossAttnUpBlock2D", | |
| "UpBlock2D" | |
| ], | |
| "upcast_attention": null | |
| } | |