| { |
| "_class_name": "ControlNetXSModel", |
| "_diffusers_version": "0.24.0.dev0", |
| "base_model_channel_sizes": { |
| "down": [ |
| [ |
| 4, |
| 320 |
| ], |
| [ |
| 320, |
| 320 |
| ], |
| [ |
| 320, |
| 320 |
| ], |
| [ |
| 320, |
| 320 |
| ], |
| [ |
| 320, |
| 640 |
| ], |
| [ |
| 640, |
| 640 |
| ], |
| [ |
| 640, |
| 640 |
| ], |
| [ |
| 640, |
| 1280 |
| ], |
| [ |
| 1280, |
| 1280 |
| ], |
| [ |
| 1280, |
| 1280 |
| ], |
| [ |
| 1280, |
| 1280 |
| ], |
| [ |
| 1280, |
| 1280 |
| ] |
| ], |
| "mid": [ |
| [ |
| 1280, |
| 1280 |
| ] |
| ], |
| "up": [ |
| [ |
| 2560, |
| 1280 |
| ], |
| [ |
| 2560, |
| 1280 |
| ], |
| [ |
| 2560, |
| 1280 |
| ], |
| [ |
| 2560, |
| 1280 |
| ], |
| [ |
| 2560, |
| 1280 |
| ], |
| [ |
| 1920, |
| 1280 |
| ], |
| [ |
| 1920, |
| 640 |
| ], |
| [ |
| 1280, |
| 640 |
| ], |
| [ |
| 960, |
| 640 |
| ], |
| [ |
| 960, |
| 320 |
| ], |
| [ |
| 640, |
| 320 |
| ], |
| [ |
| 640, |
| 320 |
| ] |
| ] |
| }, |
| "block_out_channels": [ |
| 4, |
| 8, |
| 16, |
| 16 |
| ], |
| "conditioning_channels": 3, |
| "conditioning_embedding_out_channels": [ |
| 16, |
| 32, |
| 96, |
| 256 |
| ], |
| "controlnet_conditioning_channel_order": "rgb", |
| "cross_attention_dim": 1024, |
| "down_block_types": [ |
| "CrossAttnDownBlock2D", |
| "CrossAttnDownBlock2D", |
| "CrossAttnDownBlock2D", |
| "DownBlock2D" |
| ], |
| "learn_embedding": true, |
| "norm_num_groups": 4, |
| "num_attention_heads": [ |
| 1, |
| 1, |
| 2, |
| 2 |
| ], |
| "sample_size": 64, |
| "time_embedding_dim": 1280, |
| "time_embedding_input_dim": 320, |
| "time_embedding_mix": 1.0, |
| "transformer_layers_per_block": 1, |
| "up_block_types": [ |
| "UpBlock2D", |
| "CrossAttnUpBlock2D", |
| "CrossAttnUpBlock2D", |
| "CrossAttnUpBlock2D" |
| ], |
| "upcast_attention": false |
| } |
|
|