| { | |
| "act_layer": "SiLU", | |
| "decoder_depth": 24, | |
| "dim": 1024, | |
| "domains_in": [ | |
| "caption", | |
| "t5_caption", | |
| "det", | |
| "metadata", | |
| "human_poses", | |
| "color_palette", | |
| "sam_instance", | |
| "rgb@224", | |
| "tok_rgb@224", | |
| "tok_normal@224", | |
| "tok_depth@224", | |
| "tok_semseg@224", | |
| "tok_clip@224", | |
| "tok_dinov2@224", | |
| "tok_dinov2_global", | |
| "tok_imagebind@224", | |
| "tok_imagebind_global", | |
| "tok_sam_edge@224", | |
| "tok_canny_edge@224" | |
| ], | |
| "domains_out": [ | |
| "caption", | |
| "t5_caption", | |
| "det", | |
| "metadata", | |
| "human_poses", | |
| "color_palette", | |
| "sam_instance", | |
| "tok_rgb@224", | |
| "tok_normal@224", | |
| "tok_depth@224", | |
| "tok_semseg@224", | |
| "tok_clip@224", | |
| "tok_dinov2@224", | |
| "tok_dinov2_global", | |
| "tok_imagebind@224", | |
| "tok_imagebind_global", | |
| "tok_sam_edge@224", | |
| "tok_canny_edge@224" | |
| ], | |
| "encoder_depth": 24, | |
| "gated_mlp": true, | |
| "image_size": 224, | |
| "mlp_bias": false, | |
| "mlp_ratio": 4, | |
| "norm_bias": false, | |
| "num_heads": 16, | |
| "patch_size": 16, | |
| "proj_bias": false, | |
| "qkv_bias": false, | |
| "share_modality_embeddings": false | |
| } |