| { |
| "_class_name": "BEVControlNetModel", |
| "_diffusers_version": "0.17.1", |
| "act_fn": "silu", |
| "attention_head_dim": 8, |
| "bbox_embedder_cls": "dreamforge.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding", |
| "bbox_embedder_param": { |
| "class_token_dim": 768, |
| "embedder_num_freq": 4, |
| "minmax_normalize": false, |
| "mode": "all-xyz", |
| "n_classes": 10, |
| "proj_dims": [ |
| 768, |
| 512, |
| 512, |
| 768 |
| ], |
| "trainable_class_token": false, |
| "use_text_encoder_init": true |
| }, |
| "block_out_channels": [ |
| 320, |
| 640, |
| 1280, |
| 1280 |
| ], |
| "cam_embedder_param": { |
| "include_input": true, |
| "input_dims": 3, |
| "log_sampling": true, |
| "num_freqs": 4 |
| }, |
| "camera_in_dim": 189, |
| "camera_out_dim": 768, |
| "canvas_conditioning_channels": 14, |
| "class_embed_type": null, |
| "conditioning_embedding_out_channels": [ |
| 16, |
| 32, |
| 96, |
| 256 |
| ], |
| "controlnet_conditioning_channel_order": "rgb", |
| "cross_attention_dim": 768, |
| "down_block_types": [ |
| "CrossAttnDownBlock2D", |
| "CrossAttnDownBlock2D", |
| "CrossAttnDownBlock2D", |
| "DownBlock2D" |
| ], |
| "downsample_padding": 1, |
| "drop_cam_num": 6, |
| "drop_cam_with_box": false, |
| "drop_cond_ratio": 0.25, |
| "flip_sin_to_cos": true, |
| "freq_shift": 0, |
| "global_pool_conditions": false, |
| "in_channels": 4, |
| "layers_per_block": 2, |
| "map_embedder_cls": null, |
| "map_embedder_param": null, |
| "map_size": [ |
| 4, |
| 200, |
| 200 |
| ], |
| "mid_block_scale_factor": 1, |
| "norm_eps": 1e-05, |
| "norm_num_groups": 32, |
| "num_class_embeds": null, |
| "only_cross_attention": false, |
| "projection_class_embeddings_input_dim": null, |
| "resnet_time_scale_shift": "default", |
| "uncond_cam_in_dim": [ |
| 3, |
| 7 |
| ], |
| "upcast_attention": false, |
| "use_linear_projection": false, |
| "use_uncond_map": null, |
| "with_layout_canvas": true |
| } |
|
|