diff --git a/models/checkpoints/put_checkpoints_here b/models/checkpoints/put_checkpoints_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/clip/EVA02_CLIP_L_336_psz14_s6B.pt b/models/clip/EVA02_CLIP_L_336_psz14_s6B.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea0ba9075b54a7fec1ee6ebb5eff0557c511d347 --- /dev/null +++ b/models/clip/EVA02_CLIP_L_336_psz14_s6B.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c3a17a228c567a155259b2245b0b59072bf7da510260a0a02ec54de6d50b05 +size 856461210 diff --git a/models/clip/clip_l.safetensors b/models/clip/clip_l.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e224fcda070dc105fd83c64ed2074f47a1b0ff7b --- /dev/null +++ b/models/clip/clip_l.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd +size 246144152 diff --git a/models/clip/put_clip_or_text_encoder_models_here b/models/clip/put_clip_or_text_encoder_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/clip/t5xxl_fp8_e4m3fn.safetensors b/models/clip/t5xxl_fp8_e4m3fn.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79b9ebe64019308077904fa68ed6f707569121c0 --- /dev/null +++ b/models/clip/t5xxl_fp8_e4m3fn.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d330da4816157540d6bb7838bf63a0f02f573fc48ca4d8de34bb0cbfd514f09 +size 4893934904 diff --git a/models/clip_vision/model.safetensors b/models/clip_vision/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cacafd1899a1fbf021fe1eeaafebc9bc6d2a9c07 --- /dev/null +++ b/models/clip_vision/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bf730a0c7debf160f7a6b50b3aaf3703e7e88ac73de7a314903141db026dcb +size 1710540580 diff --git a/models/clip_vision/put_clip_vision_models_here b/models/clip_vision/put_clip_vision_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/configs/anything_v3.yaml b/models/configs/anything_v3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34 --- /dev/null +++ b/models/configs/anything_v3.yaml @@ -0,0 +1,73 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/models/configs/v1-inference.yaml b/models/configs/v1-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4effe569e897369918625f9d8be5603a0e6a0d6 --- /dev/null +++ b/models/configs/v1-inference.yaml @@ -0,0 +1,70 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/models/configs/v1-inference_clip_skip_2.yaml b/models/configs/v1-inference_clip_skip_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34 --- /dev/null +++ b/models/configs/v1-inference_clip_skip_2.yaml @@ -0,0 +1,73 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/models/configs/v1-inference_clip_skip_2_fp16.yaml b/models/configs/v1-inference_clip_skip_2_fp16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7eca31c7b5e571c2b1348e94ed9d69978ebd2d52 --- /dev/null +++ b/models/configs/v1-inference_clip_skip_2_fp16.yaml @@ -0,0 +1,74 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/models/configs/v1-inference_fp16.yaml b/models/configs/v1-inference_fp16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..147f42b17b835cc839338156f99e8f971df5c1aa --- /dev/null +++ b/models/configs/v1-inference_fp16.yaml @@ -0,0 +1,71 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/models/configs/v1-inpainting-inference.yaml b/models/configs/v1-inpainting-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45f3f82d461cd8c6109f26ec3b1da75366eda0b0 --- /dev/null +++ b/models/configs/v1-inpainting-inference.yaml @@ -0,0 +1,71 @@ +model: + base_learning_rate: 7.5e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid # important + monitor: val/loss_simple_ema + scale_factor: 0.18215 + finetune_keys: null + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 9 # 4 data + 4 downscaled image + 1 mask + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + diff --git a/models/configs/v2-inference-v.yaml b/models/configs/v2-inference-v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ec8dfbfefe94ae8522c93017668fea78d580acf --- /dev/null +++ b/models/configs/v2-inference-v.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/models/configs/v2-inference-v_fp32.yaml b/models/configs/v2-inference-v_fp32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5c9b9cb29ca162ade44a7c922f59e75d7d57813 --- /dev/null +++ b/models/configs/v2-inference-v_fp32.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: False + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/models/configs/v2-inference.yaml b/models/configs/v2-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..152c4f3c2b36c3b246a9cb10eb8166134b0d2e1c --- /dev/null +++ b/models/configs/v2-inference.yaml @@ -0,0 +1,67 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/models/configs/v2-inference_fp32.yaml b/models/configs/v2-inference_fp32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d03231f3f2c2e8ef8fbe0d781e5f3d65409ef3a --- /dev/null +++ b/models/configs/v2-inference_fp32.yaml @@ -0,0 +1,67 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: False + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/models/configs/v2-inpainting-inference.yaml b/models/configs/v2-inpainting-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32a9471d71b828c51bcbbabfe34c5f6c8282c803 --- /dev/null +++ b/models/configs/v2-inpainting-inference.yaml @@ -0,0 +1,158 @@ +model: + base_learning_rate: 5.0e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: hybrid + scale_factor: 0.18215 + monitor: val/loss_simple_ema + finetune_keys: null + use_ema: False + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + image_size: 32 # unused + in_channels: 9 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" + + +data: + target: ldm.data.laion.WebDataModuleFromConfig + params: + tar_base: null # for concat as in LAION-A + p_unsafe_threshold: 0.1 + filter_word_list: "data/filters.yaml" + max_pwatermark: 0.45 + batch_size: 8 + num_workers: 6 + multinode: True + min_size: 512 + train: + shards: + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar" + shuffle: 10000 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 512 + interpolation: 3 + - target: torchvision.transforms.RandomCrop + params: + size: 512 + postprocess: + target: ldm.data.laion.AddMask + params: + mode: "512train-large" + p_drop: 0.25 + # NOTE use enough shards to avoid empty validation loops in workers + validation: + shards: + - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - " + shuffle: 0 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 512 + interpolation: 3 + - target: torchvision.transforms.CenterCrop + params: + size: 512 + postprocess: + target: ldm.data.laion.AddMask + params: + mode: "512train-large" + p_drop: 0.25 + +lightning: + find_unused_parameters: True + modelcheckpoint: + params: + every_n_train_steps: 5000 + + callbacks: + metrics_over_trainsteps_checkpoint: + params: + every_n_train_steps: 10000 + + image_logger: + target: main.ImageLogger + params: + enable_autocast: False + disabled: False + batch_frequency: 1000 + max_images: 4 + increase_log_steps: False + log_first_step: False + log_images_kwargs: + use_ema_scope: False + inpaint: False + plot_progressive_rows: False + plot_diffusion_rows: False + N: 4 + unconditional_guidance_scale: 5.0 + unconditional_guidance_label: [""] + ddim_steps: 50 # todo check these out for depth2img, + ddim_eta: 0.0 # todo check these out for depth2img, + + trainer: + benchmark: True + val_check_interval: 5000000 + num_sanity_val_steps: 0 + accumulate_grad_batches: 1 diff --git a/models/controlnet/put_controlnets_and_t2i_here b/models/controlnet/put_controlnets_and_t2i_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/diffusers/put_diffusers_models_here b/models/diffusers/put_diffusers_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/embeddings/put_embeddings_or_textual_inversion_concepts_here b/models/embeddings/put_embeddings_or_textual_inversion_concepts_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/facedetection/detection_Resnet50_Final.pth b/models/facedetection/detection_Resnet50_Final.pth new file mode 100644 index 0000000000000000000000000000000000000000..16546738ce0a00a9fd47585e0fc52744d31cc117 --- /dev/null +++ b/models/facedetection/detection_Resnet50_Final.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d +size 109497761 diff --git a/models/facedetection/parsing_bisenet.pth b/models/facedetection/parsing_bisenet.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca57f3257ca7715bc340d065764bc249d985c287 --- /dev/null +++ b/models/facedetection/parsing_bisenet.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468e13ca13a9b43cc0881a9f99083a430e9c0a38abd935431d1c28ee94b26567 +size 53289463 diff --git a/models/facedetection/parsing_parsenet.pth b/models/facedetection/parsing_parsenet.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ac2efc50360a79c9905dbac57d9d99cbfbe863c --- /dev/null +++ b/models/facedetection/parsing_parsenet.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2 +size 85331193 diff --git a/models/facerestore_models/GFPGANv1.3.pth b/models/facerestore_models/GFPGANv1.3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1da748a3ef84ff85dd2c77c836f222aae22b007e --- /dev/null +++ b/models/facerestore_models/GFPGANv1.3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c953a88f2727c85c3d9ae72e2bd4846bbaf59fe6972ad94130e23e7017524a70 +size 348632874 diff --git a/models/facerestore_models/GFPGANv1.4.pth b/models/facerestore_models/GFPGANv1.4.pth new file mode 100644 index 0000000000000000000000000000000000000000..afedb5c7e826056840c9cc183f2c6f0186fd17ba --- /dev/null +++ b/models/facerestore_models/GFPGANv1.4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cd4703ab14f4d01fd1383a8a8b266f9a5833dacee8e6a79d3bf21a1b6be5ad +size 348632874 diff --git a/models/facerestore_models/GPEN-BFR-1024.onnx b/models/facerestore_models/GPEN-BFR-1024.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c62bc9e8d12595f0b91894e12351403fa83b4776 --- /dev/null +++ b/models/facerestore_models/GPEN-BFR-1024.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec8892093d7b99828acde97bf231fb0964d3fb11b43f3b0951e36ef1e192a3e +size 285101993 diff --git a/models/facerestore_models/GPEN-BFR-2048.onnx b/models/facerestore_models/GPEN-BFR-2048.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5f96247e9d808aed49cf03142dfb27a43fc15ce0 --- /dev/null +++ b/models/facerestore_models/GPEN-BFR-2048.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0229ff43f979c360bd19daa9cd0ce893722d59f41a41822b9223ebbe4f89b3e +size 285469146 diff --git a/models/facerestore_models/GPEN-BFR-512.onnx b/models/facerestore_models/GPEN-BFR-512.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d2111b516504e5ae7bcaeee49192bcac9ca52690 --- /dev/null +++ b/models/facerestore_models/GPEN-BFR-512.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf80acb8e91ba8852e3f012505be2c3b6cd6b3eed5ec605e3db87863c4e74d4e +size 284244491 diff --git a/models/facerestore_models/codeformer-v0.1.0.pth b/models/facerestore_models/codeformer-v0.1.0.pth new file mode 100644 index 0000000000000000000000000000000000000000..edd450da13c5ff890f70d726c992af569813f6af --- /dev/null +++ b/models/facerestore_models/codeformer-v0.1.0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7 +size 376637898 diff --git a/models/facerestore_models/codeformer.pth b/models/facerestore_models/codeformer.pth new file mode 100644 index 0000000000000000000000000000000000000000..edd450da13c5ff890f70d726c992af569813f6af --- /dev/null +++ b/models/facerestore_models/codeformer.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7 +size 376637898 diff --git a/models/gligen/put_gligen_models_here b/models/gligen/put_gligen_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/hypernetworks/put_hypernetworks_here b/models/hypernetworks/put_hypernetworks_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/insightface/inswapper_128.onnx b/models/insightface/inswapper_128.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cb672b799d74fdf7ab8b172a1b1d78411f6400f5 --- /dev/null +++ b/models/insightface/inswapper_128.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af +size 554253681 diff --git a/models/insightface/models/antelopev2.zip b/models/insightface/models/antelopev2.zip new file mode 100644 index 0000000000000000000000000000000000000000..b940dc7e26eb5353e5d5722d4cec3d513cffb0d3 --- /dev/null +++ b/models/insightface/models/antelopev2.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e182f14fc6e80b3bfa375b33eb6cff7ee05d8ef7633e738d1c89021dcf0c5c5 +size 360662982 diff --git a/models/insightface/models/antelopev2/1k3d68.onnx b/models/insightface/models/antelopev2/1k3d68.onnx new file mode 100644 index 0000000000000000000000000000000000000000..221aa2f02a6faccddb2723529e1f93c7db2edbdc --- /dev/null +++ b/models/insightface/models/antelopev2/1k3d68.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc +size 143607619 diff --git a/models/insightface/models/antelopev2/2d106det.onnx b/models/insightface/models/antelopev2/2d106det.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cdb163d88b5f51396855ebc795e0114322c98b6b --- /dev/null +++ b/models/insightface/models/antelopev2/2d106det.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf +size 5030888 diff --git a/models/insightface/models/antelopev2/genderage.onnx b/models/insightface/models/antelopev2/genderage.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fcf638481cea978e99ddabd914ccd3b70c8401cb --- /dev/null +++ b/models/insightface/models/antelopev2/genderage.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb +size 1322532 diff --git a/models/insightface/models/antelopev2/glintr100.onnx b/models/insightface/models/antelopev2/glintr100.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9d221846df998a9c85239fd74a9fe5685193775f --- /dev/null +++ b/models/insightface/models/antelopev2/glintr100.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab1d6435d639628a6f3e5008dd4f929edf4c4124b1a7169e1048f9fef534cdf +size 260665334 diff --git a/models/insightface/models/antelopev2/scrfd_10g_bnkps.onnx b/models/insightface/models/antelopev2/scrfd_10g_bnkps.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aa586e034379fa5ea5babc8aa73d47afcd0fa6c2 --- /dev/null +++ b/models/insightface/models/antelopev2/scrfd_10g_bnkps.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91 +size 16923827 diff --git a/models/insightface/models/buffalo_l/1k3d68.onnx b/models/insightface/models/buffalo_l/1k3d68.onnx new file mode 100644 index 0000000000000000000000000000000000000000..221aa2f02a6faccddb2723529e1f93c7db2edbdc --- /dev/null +++ b/models/insightface/models/buffalo_l/1k3d68.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc +size 143607619 diff --git a/models/insightface/models/buffalo_l/2d106det.onnx b/models/insightface/models/buffalo_l/2d106det.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cdb163d88b5f51396855ebc795e0114322c98b6b --- /dev/null +++ b/models/insightface/models/buffalo_l/2d106det.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf +size 5030888 diff --git a/models/insightface/models/buffalo_l/det_10g.onnx b/models/insightface/models/buffalo_l/det_10g.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aa586e034379fa5ea5babc8aa73d47afcd0fa6c2 --- /dev/null +++ b/models/insightface/models/buffalo_l/det_10g.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91 +size 16923827 diff --git a/models/insightface/models/buffalo_l/genderage.onnx b/models/insightface/models/buffalo_l/genderage.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fcf638481cea978e99ddabd914ccd3b70c8401cb --- /dev/null +++ b/models/insightface/models/buffalo_l/genderage.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb +size 1322532 diff --git a/models/insightface/models/buffalo_l/w600k_r50.onnx b/models/insightface/models/buffalo_l/w600k_r50.onnx new file mode 100644 index 0000000000000000000000000000000000000000..571d2bb9ffd76399b23260620b9101b20bcc4e99 --- /dev/null +++ b/models/insightface/models/buffalo_l/w600k_r50.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43 +size 174383860 diff --git a/models/inspyrenet/InSPyReNet-SwinB-Plus-Ultra.pth b/models/inspyrenet/InSPyReNet-SwinB-Plus-Ultra.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0b46444117568e9295f6bc637eed36d529fe9b3 --- /dev/null +++ b/models/inspyrenet/InSPyReNet-SwinB-Plus-Ultra.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bea496a098c5a64a5ecd7163dc309e12734a1a084548fb66d3ad093fc4683ea +size 193863680 diff --git a/models/loras/put_loras_here b/models/loras/put_loras_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/pulid/pulid_flux_v0.9.1.safetensors b/models/pulid/pulid_flux_v0.9.1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..185c83b328d025054c3ff7c6446dc8cb28361355 --- /dev/null +++ b/models/pulid/pulid_flux_v0.9.1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c41c3af322b02e58e1b32842e4601e08c8f16ec1fe80089dbe957df510f51d +size 1142099520 diff --git a/models/rembg/isnet-anime.onnx b/models/rembg/isnet-anime.onnx new file mode 100644 index 0000000000000000000000000000000000000000..29dfff71d50be4ff3fe5876bf7c318b9beda16a0 --- /dev/null +++ b/models/rembg/isnet-anime.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15622d853e8260172812b657053460e20806f04b9e05147d49af7bed31a6e99 +size 176069933 diff --git a/models/rembg/isnet-general-use.onnx b/models/rembg/isnet-general-use.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aae8625d60df68f7a2c7fa770814a3e6eb30612a --- /dev/null +++ b/models/rembg/isnet-general-use.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60920e99c45464f2ba57bee2ad08c919a52bbf852739e96947fbb4358c0d964a +size 178648008 diff --git a/models/rembg/sam_vit_b_01ec64.decoder.onnx b/models/rembg/sam_vit_b_01ec64.decoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c099404a440133bc1d2315a167c4c81900cc2a41 --- /dev/null +++ b/models/rembg/sam_vit_b_01ec64.decoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9820a48a2d002aead7b4dab204d813176a810a5f1dc8f32e32f759cc070ae97e +size 16500570 diff --git a/models/rembg/sam_vit_b_01ec64.encoder.onnx b/models/rembg/sam_vit_b_01ec64.encoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c30dfe948a4ad0e0d492e6e3748ff2e022b2e041 --- /dev/null +++ b/models/rembg/sam_vit_b_01ec64.encoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a29c39ebc8ae4dfc3d9ec8eae28c28efc8360233fa8ffba229eb9f6ac0c30a +size 359217310 diff --git a/models/rembg/silueta.onnx b/models/rembg/silueta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dc57a23a7dd97f680decaf4246d88ca16802c58d --- /dev/null +++ b/models/rembg/silueta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75da6c8d2f8096ec743d071951be73b4a8bc7b3e51d9a6625d63644f90ffeedb +size 44173029 diff --git a/models/rembg/u2net.onnx b/models/rembg/u2net.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d5e2c4d942dc1e3d0a5cc5b194516e9ddd70a3ed --- /dev/null +++ b/models/rembg/u2net.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d10d2f3bb75ae3b6d527c77944fc5e7dcd94b29809d47a739a7a728a912b491 +size 175997641 diff --git a/models/rembg/u2netp.onnx b/models/rembg/u2netp.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d15c8c667182a34f490f4e53633c3e991c20a18 --- /dev/null +++ b/models/rembg/u2netp.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309c8469258dda742793dce0ebea8e6dd393174f89934733ecc8b14c76f4ddd8 +size 4574861 diff --git a/models/roop/inswapper_128.onnx b/models/roop/inswapper_128.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cb672b799d74fdf7ab8b172a1b1d78411f6400f5 --- /dev/null +++ b/models/roop/inswapper_128.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af +size 554253681 diff --git a/models/style_models/put_t2i_style_model_here b/models/style_models/put_t2i_style_model_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/ultralytics/bbox/face_yolov8m.pt b/models/ultralytics/bbox/face_yolov8m.pt new file mode 100644 index 0000000000000000000000000000000000000000..3581945a1f3342c5c48d0b7b339b56dd1676008a --- /dev/null +++ b/models/ultralytics/bbox/face_yolov8m.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02b8a23e6f12bd2c1b1f6714f66f984c728fa41ed749d033e7d6dea511ef70c +size 52026019 diff --git a/models/ultralytics/bbox/hand_yolov8s.pt b/models/ultralytics/bbox/hand_yolov8s.pt new file mode 100644 index 0000000000000000000000000000000000000000..21091d538a48b1afd5e9910e2d3863a1d4974799 --- /dev/null +++ b/models/ultralytics/bbox/hand_yolov8s.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4faf8d17286ace2c3d3346c6d0d4a0c8d62404955263a7ae95c1dd7eb877af +size 22507707 diff --git a/models/ultralytics/segm/person_yolov8m-seg.pt b/models/ultralytics/segm/person_yolov8m-seg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ae56d795a72bb6b19938117adf3c50ee70c21fb --- /dev/null +++ b/models/ultralytics/segm/person_yolov8m-seg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d881ec50b831f546e37977081b18f4e3bf65664aec163f97a311b0955499795 +size 54827683 diff --git a/models/unet/put_unet_files_here b/models/unet/put_unet_files_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/upscale_models/4x-AnimeSharp.pth b/models/upscale_models/4x-AnimeSharp.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b0c258568c442d8a1ea6a48b71096a314cd7211 --- /dev/null +++ b/models/upscale_models/4x-AnimeSharp.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a7de2dafd7331c1992862bbbcd9e9712a9f9f8e6303f0aaa59b4341d359bab +size 67010245 diff --git a/models/upscale_models/4x-UltraSharp.pth b/models/upscale_models/4x-UltraSharp.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f3bb839bebd6cd26c94122b7651261d0b346a50 --- /dev/null +++ b/models/upscale_models/4x-UltraSharp.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5812231fc936b42af08a5edba784195495d303d5b3248c24489ef0c4021fe01 +size 66961958 diff --git a/models/upscale_models/4xUltrasharp_4xUltrasharpV10.pt b/models/upscale_models/4xUltrasharp_4xUltrasharpV10.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f3bb839bebd6cd26c94122b7651261d0b346a50 --- /dev/null +++ b/models/upscale_models/4xUltrasharp_4xUltrasharpV10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5812231fc936b42af08a5edba784195495d303d5b3248c24489ef0c4021fe01 +size 66961958 diff --git a/models/upscale_models/8x_NMKD-Superscale_150000_G.pth b/models/upscale_models/8x_NMKD-Superscale_150000_G.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3410efafa0ac437ae071cc7ac24ae8c8d2b7fa4 --- /dev/null +++ b/models/upscale_models/8x_NMKD-Superscale_150000_G.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb44a906b9bc4dd89d6bf8d1d28e6cc59cc58c2a28251aa88c85a38e72c8507 +size 67106707 diff --git a/models/upscale_models/RealESRGAN_x2.pth b/models/upscale_models/RealESRGAN_x2.pth new file mode 100644 index 0000000000000000000000000000000000000000..313b87ab9359a04b0f450695b1a01a88edd4ac95 --- /dev/null +++ b/models/upscale_models/RealESRGAN_x2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c830d067d54fc767b9543a8432f36d91bc2de313584e8bbfe4ac26a47339e899 +size 67061725 diff --git a/models/upscale_models/RealESRGAN_x4plus.pth b/models/upscale_models/RealESRGAN_x4plus.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ddced536d07803300536317fef662bb499bca71 --- /dev/null +++ b/models/upscale_models/RealESRGAN_x4plus.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa0d38905f75ac06eb49a7951b426670021be3018265fd191d2125df9d682f1 +size 67040989 diff --git a/models/upscale_models/ldsr/last.ckpt b/models/upscale_models/ldsr/last.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..65db55d23c050127d50d81b9341aefd106e52e79 --- /dev/null +++ b/models/upscale_models/ldsr/last.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c209caecac2f97b4bb8f4d726b70ac2ac9b35904b7fc99801e1f5e61f9210c13 +size 2039822129 diff --git a/models/upscale_models/put_esrgan_and_other_upscale_models_here b/models/upscale_models/put_esrgan_and_other_upscale_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/vae/ae.safetensors b/models/vae/ae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71f11a92800c4a93cead7cebc556531926ecfc33 --- /dev/null +++ b/models/vae/ae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38 +size 335304388 diff --git a/models/vae/put_vae_here b/models/vae/put_vae_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here b/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391