diff --git a/cldm_v15.yaml b/cldm_v15.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/cldm_v15.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/cldm_v21.yaml b/cldm_v21.yaml deleted file mode 100755 index fc65193647e476e108fce5977f11250d55919106..0000000000000000000000000000000000000000 --- a/cldm_v21.yaml +++ /dev/null @@ -1,85 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - use_checkpoint: True - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - use_checkpoint: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" diff --git a/control_sd15_canny.yaml b/control_sd15_canny.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_canny.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_depth.yaml b/control_sd15_depth.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_depth.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_hed.yaml b/control_sd15_hed.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_hed.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_mlsd.yaml b/control_sd15_mlsd.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_mlsd.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_normal.yaml b/control_sd15_normal.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_normal.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_openpose.yaml b/control_sd15_openpose.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_openpose.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_scribble.yaml b/control_sd15_scribble.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_scribble.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_sd15_seg.yaml b/control_sd15_seg.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_sd15_seg.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11e_sd15_ip2p.yaml b/control_v11e_sd15_ip2p.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11e_sd15_ip2p.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11e_sd15_ip2p_fp16.yaml b/control_v11e_sd15_ip2p_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11e_sd15_ip2p_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11e_sd15_shuffle.yaml b/control_v11e_sd15_shuffle.yaml deleted file mode 100755 index 862304b0090bf65984473c30ab0ebc30a4858400..0000000000000000000000000000000000000000 --- a/control_v11e_sd15_shuffle.yaml +++ /dev/null @@ -1,80 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - global_average_pooling: True - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11e_sd15_shuffle_fp16.yaml b/control_v11e_sd15_shuffle_fp16.yaml deleted file mode 100755 index 862304b0090bf65984473c30ab0ebc30a4858400..0000000000000000000000000000000000000000 --- a/control_v11e_sd15_shuffle_fp16.yaml +++ /dev/null @@ -1,80 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - global_average_pooling: True - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11f1e_sd15_tile.yaml b/control_v11f1e_sd15_tile.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11f1e_sd15_tile.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11f1e_sd15_tile_fp16.yaml b/control_v11f1e_sd15_tile_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11f1e_sd15_tile_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11f1p_sd15_depth.yaml b/control_v11f1p_sd15_depth.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11f1p_sd15_depth.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11f1p_sd15_depth_fp16.yaml b/control_v11f1p_sd15_depth_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11f1p_sd15_depth_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_canny.yaml b/control_v11p_sd15_canny.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_canny.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_canny_fp16.yaml b/control_v11p_sd15_canny_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_canny_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_inpaint.yaml b/control_v11p_sd15_inpaint.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_inpaint.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_inpaint_fp16.yaml b/control_v11p_sd15_inpaint_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_inpaint_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_lineart.yaml b/control_v11p_sd15_lineart.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_lineart.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_lineart_fp16.yaml b/control_v11p_sd15_lineart_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_lineart_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_mlsd.yaml b/control_v11p_sd15_mlsd.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_mlsd.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_mlsd_fp16.yaml b/control_v11p_sd15_mlsd_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_mlsd_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_normalbae.yaml b/control_v11p_sd15_normalbae.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_normalbae.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_normalbae_fp16.yaml b/control_v11p_sd15_normalbae_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_normalbae_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_openpose.yaml b/control_v11p_sd15_openpose.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_openpose.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_openpose_fp16.yaml b/control_v11p_sd15_openpose_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_openpose_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_scribble.yaml b/control_v11p_sd15_scribble.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_scribble.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_scribble_fp16.yaml b/control_v11p_sd15_scribble_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_scribble_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_seg.yaml b/control_v11p_sd15_seg.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_seg.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_seg_fp16.yaml b/control_v11p_sd15_seg_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_seg_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_softedge.yaml b/control_v11p_sd15_softedge.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_softedge.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15_softedge_fp16.yaml b/control_v11p_sd15_softedge_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15_softedge_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15s2_lineart_anime.yaml b/control_v11p_sd15s2_lineart_anime.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15s2_lineart_anime.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v11p_sd15s2_lineart_anime_fp16.yaml b/control_v11p_sd15s2_lineart_anime_fp16.yaml deleted file mode 100755 index fde1825577acd46dc90d8d7c6730e22be762fccb..0000000000000000000000000000000000000000 --- a/control_v11p_sd15s2_lineart_anime_fp16.yaml +++ /dev/null @@ -1,79 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/control_v1p_sd15_qrcode_monster.yaml b/control_v1p_sd15_qrcode_monster.yaml deleted file mode 100755 index 05dc29ec08445c5701104c66a676df763ec066a5..0000000000000000000000000000000000000000 --- a/control_v1p_sd15_qrcode_monster.yaml +++ /dev/null @@ -1,80 +0,0 @@ -model: - target: cldm.cldm.ControlLDM - params: - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: "jpg" - cond_stage_key: "txt" - control_key: "hint" - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - only_mid_control: False - - control_stage_config: - target: cldm.cldm.ControlNet - params: - image_size: 32 # unused - in_channels: 4 - hint_channels: 3 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - unet_config: - target: cldm.cldm.ControlledUnetModel - params: - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_heads: 8 - use_spatial_transformer: True - transformer_depth: 1 - context_dim: 768 - use_checkpoint: True - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenCLIPEmbedder - diff --git a/image_adapter_v14.yaml b/image_adapter_v14.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/image_adapter_v14.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/sketch_adapter_v14.yaml b/sketch_adapter_v14.yaml deleted file mode 100755 index 686c5f172bf941ffaaee58b912245d6ffb36f4d3..0000000000000000000000000000000000000000 --- a/sketch_adapter_v14.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 64 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_canny_sd14v1.yaml b/t2iadapter_canny_sd14v1.yaml deleted file mode 100755 index 686c5f172bf941ffaaee58b912245d6ffb36f4d3..0000000000000000000000000000000000000000 --- a/t2iadapter_canny_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 64 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_canny_sd15v2.yaml b/t2iadapter_canny_sd15v2.yaml deleted file mode 100755 index 686c5f172bf941ffaaee58b912245d6ffb36f4d3..0000000000000000000000000000000000000000 --- a/t2iadapter_canny_sd15v2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 64 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_color_sd14v1.yaml b/t2iadapter_color_sd14v1.yaml deleted file mode 100755 index 994708a079bba499d435e10eec3504a9cd4f8d0c..0000000000000000000000000000000000000000 --- a/t2iadapter_color_sd14v1.yaml +++ /dev/null @@ -1,6 +0,0 @@ -model: - target: scripts.adapter.Adapter_light - params: - channels: [320, 640, 1280, 1280] - nums_rb: 4 - cin: 192 \ No newline at end of file diff --git a/t2iadapter_depth_sd14v1.yaml b/t2iadapter_depth_sd14v1.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_depth_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_depth_sd15v2.yaml b/t2iadapter_depth_sd15v2.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_depth_sd15v2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_keypose_sd14v1.yaml b/t2iadapter_keypose_sd14v1.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_keypose_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_openpose_sd14v1.yaml b/t2iadapter_openpose_sd14v1.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_openpose_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_seg_sd14v1.yaml b/t2iadapter_seg_sd14v1.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_seg_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_sketch_sd14v1.yaml b/t2iadapter_sketch_sd14v1.yaml deleted file mode 100755 index 686c5f172bf941ffaaee58b912245d6ffb36f4d3..0000000000000000000000000000000000000000 --- a/t2iadapter_sketch_sd14v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 64 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_sketch_sd15v2.yaml b/t2iadapter_sketch_sd15v2.yaml deleted file mode 100755 index 686c5f172bf941ffaaee58b912245d6ffb36f4d3..0000000000000000000000000000000000000000 --- a/t2iadapter_sketch_sd15v2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 64 - use_conv: false \ No newline at end of file diff --git a/t2iadapter_style_sd14v1.yaml b/t2iadapter_style_sd14v1.yaml deleted file mode 100755 index 69bcc41a1152e9bfffeac20ba77baf378336a7a0..0000000000000000000000000000000000000000 --- a/t2iadapter_style_sd14v1.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: - target: scripts.adapter.StyleAdapter - params: - width: 1024 - context_dim: 768 - num_head: 8 - n_layes: 3 - num_token: 8 \ No newline at end of file diff --git a/t2iadapter_zoedepth_sd15v1.yaml b/t2iadapter_zoedepth_sd15v1.yaml deleted file mode 100755 index 439d33cc53a349c9b8c1a0091cbd3643359216d5..0000000000000000000000000000000000000000 --- a/t2iadapter_zoedepth_sd15v1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -model: - target: tencentarc.t21_adapter - params: - channels: [320, 640, 1280, 1280] - nums_rb: 2 - ksize: 1 - sk: true - cin: 192 - use_conv: false \ No newline at end of file