model: base_learning_rate: 1.0e-6 target: refnet.models.v2-colorizerXL.InferenceWrapperXL params: linear_start: 0.00085 linear_end: 0.0120 timesteps: 1000 image_size: 128 channels: 4 scale_factor: 0.13025 controller: true unet_config: target: refnet.modules.unet.DualCondUNetXL params: use_checkpoint: True in_channels: 4 in_channels_fg: 4 out_channels: 4 model_channels: 320 adm_in_channels: 512 num_classes: sequential attention_resolutions: [4, 2] num_res_blocks: 2 channel_mult: [1, 2, 4] num_head_channels: 64 use_spatial_transformer: true use_linear_in_transformer: true transformer_depth: [1, 2, 10] context_dim: 2048 map_module: false warp_module: false style_modulation: false bg_encoder_config: target: refnet.modules.unet.ReferenceNet params: use_checkpoint: True in_channels: 6 model_channels: 320 adm_in_channels: 1024 num_classes: sequential attention_resolutions: [ 4, 2 ] num_res_blocks: 2 channel_mult: [ 1, 2, 4 ] num_head_channels: 64 use_spatial_transformer: true use_linear_in_transformer: true disable_cross_attentions: true context_dim: 2048 transformer_depth: [ 1, 2, 10 ] first_stage_config: target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 ddconfig: double_z: true z_channels: 4 resolution: 512 in_channels: 3 out_ch: 3 ch: 128 ch_mult: [1, 2, 4, 4] num_res_blocks: 2 attn_resolutions: [] dropout: 0.0 cond_stage_config: target: refnet.modules.embedder.HFCLIPVisionModel params: arch: ViT-bigG-14 img_embedder_config: target: refnet.modules.embedder.WDv14SwinTransformerV2 control_encoder_config: target: refnet.modules.encoder.MultiScaleAttentionEncoder params: in_ch: 3 model_channels: 320 ch_mults: [1, 2, 4] proj_config: target: refnet.modules.proj.ClusterConcat # target: refnet.modules.proj.RecoveryClusterConcat params: input_dim: 1280 c_dim: 1024 output_dim: 2048 token_length: 196 dim_head: 128 scalar_embedder_config: target: refnet.modules.embedder.TimestepEmbedding params: embed_dim: 256 lora_config: lora_params: [ { label: background, root_module: model.diffusion_model, target_keys: [ attn2.to_q, attn2.to_k, attn2.to_v ], r: 4, } ]