Spaces:
Running on Zero
Running on Zero
| model: | |
| base_learning_rate: 1.0e-6 | |
| target: refnet.models.v2-colorizerXL.InferenceWrapperXL | |
| params: | |
| linear_start: 0.00085 | |
| linear_end: 0.0120 | |
| timesteps: 1000 | |
| image_size: 128 | |
| channels: 4 | |
| scale_factor: 0.13025 | |
| controller: true | |
| unet_config: | |
| target: refnet.modules.unet.DualCondUNetXL | |
| params: | |
| use_checkpoint: True | |
| in_channels: 4 | |
| in_channels_fg: 4 | |
| out_channels: 4 | |
| model_channels: 320 | |
| adm_in_channels: 512 | |
| num_classes: sequential | |
| attention_resolutions: [4, 2] | |
| num_res_blocks: 2 | |
| channel_mult: [1, 2, 4] | |
| num_head_channels: 64 | |
| use_spatial_transformer: true | |
| use_linear_in_transformer: true | |
| transformer_depth: [1, 2, 10] | |
| context_dim: 2048 | |
| map_module: false | |
| warp_module: false | |
| style_modulation: false | |
| bg_encoder_config: | |
| target: refnet.modules.unet.ReferenceNet | |
| params: | |
| use_checkpoint: True | |
| in_channels: 6 | |
| model_channels: 320 | |
| adm_in_channels: 1024 | |
| num_classes: sequential | |
| attention_resolutions: [ 4, 2 ] | |
| num_res_blocks: 2 | |
| channel_mult: [ 1, 2, 4 ] | |
| num_head_channels: 64 | |
| use_spatial_transformer: true | |
| use_linear_in_transformer: true | |
| disable_cross_attentions: true | |
| context_dim: 2048 | |
| transformer_depth: [ 1, 2, 10 ] | |
| first_stage_config: | |
| target: ldm.models.autoencoder.AutoencoderKL | |
| params: | |
| embed_dim: 4 | |
| ddconfig: | |
| double_z: true | |
| z_channels: 4 | |
| resolution: 512 | |
| in_channels: 3 | |
| out_ch: 3 | |
| ch: 128 | |
| ch_mult: [1, 2, 4, 4] | |
| num_res_blocks: 2 | |
| attn_resolutions: [] | |
| dropout: 0.0 | |
| cond_stage_config: | |
| target: refnet.modules.embedder.HFCLIPVisionModel | |
| params: | |
| arch: ViT-bigG-14 | |
| img_embedder_config: | |
| target: refnet.modules.embedder.WDv14SwinTransformerV2 | |
| control_encoder_config: | |
| target: refnet.modules.encoder.MultiScaleAttentionEncoder | |
| params: | |
| in_ch: 3 | |
| model_channels: 320 | |
| ch_mults: [1, 2, 4] | |
| proj_config: | |
| target: refnet.modules.proj.ClusterConcat | |
| # target: refnet.modules.proj.RecoveryClusterConcat | |
| params: | |
| input_dim: 1280 | |
| c_dim: 1024 | |
| output_dim: 2048 | |
| token_length: 196 | |
| dim_head: 128 | |
| scalar_embedder_config: | |
| target: refnet.modules.embedder.TimestepEmbedding | |
| params: | |
| embed_dim: 256 | |
| lora_config: | |
| lora_params: [ | |
| { | |
| label: background, | |
| root_module: model.diffusion_model, | |
| target_keys: [ attn2.to_q, attn2.to_k, attn2.to_v ], | |
| r: 4, | |
| } | |
| ] |