diff --git a/audio_encoders/put_audio_encoder_models_here b/audio_encoders/put_audio_encoder_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/checkpoints/put_checkpoints_here b/checkpoints/put_checkpoints_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/clip/put_clip_or_text_encoder_models_here b/clip/put_clip_or_text_encoder_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/clip/umt5_xxl_fp16.safetensors b/clip/umt5_xxl_fp16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9aa640a3a4ba42fcb2baa7c929a0e0ec9cdc9968
--- /dev/null
+++ b/clip/umt5_xxl_fp16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8850f1961e1cf8a77cca4c964a358d303f490833c6c087d0cff4b2f99db2af
+size 11366399385
diff --git a/clip_vision/clip_vision_h.safetensors b/clip_vision/clip_vision_h.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0451a9fda3873d0db8725fcea14e5660e1793f83
--- /dev/null
+++ b/clip_vision/clip_vision_h.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64a7ef761bfccbadbaa3da77366aac4185a6c58fa5de5f589b42a65bcc21f161
+size 1264219396
diff --git a/clip_vision/put_clip_vision_models_here b/clip_vision/put_clip_vision_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/configs/anything_v3.yaml b/configs/anything_v3.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34
--- /dev/null
+++ b/configs/anything_v3.yaml
@@ -0,0 +1,73 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+      params:
+        layer: "hidden"
+        layer_idx: -2
diff --git a/configs/v1-inference.yaml b/configs/v1-inference.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4effe569e897369918625f9d8be5603a0e6a0d6
--- /dev/null
+++ b/configs/v1-inference.yaml
@@ -0,0 +1,70 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
diff --git a/configs/v1-inference_clip_skip_2.yaml b/configs/v1-inference_clip_skip_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34
--- /dev/null
+++ b/configs/v1-inference_clip_skip_2.yaml
@@ -0,0 +1,73 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+      params:
+        layer: "hidden"
+        layer_idx: -2
diff --git a/configs/v1-inference_clip_skip_2_fp16.yaml b/configs/v1-inference_clip_skip_2_fp16.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7eca31c7b5e571c2b1348e94ed9d69978ebd2d52
--- /dev/null
+++ b/configs/v1-inference_clip_skip_2_fp16.yaml
@@ -0,0 +1,74 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+      params:
+        layer: "hidden"
+        layer_idx: -2
diff --git a/configs/v1-inference_fp16.yaml b/configs/v1-inference_fp16.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..147f42b17b835cc839338156f99e8f971df5c1aa
--- /dev/null
+++ b/configs/v1-inference_fp16.yaml
@@ -0,0 +1,71 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 10000 ]
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45f3f82d461cd8c6109f26ec3b1da75366eda0b0
--- /dev/null
+++ b/configs/v1-inpainting-inference.yaml
@@ -0,0 +1,71 @@
+model:
+  base_learning_rate: 7.5e-05
+  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false   # Note: different from the one we trained before
+    conditioning_key: hybrid   # important
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    finetune_keys: null
+
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
+        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
+        f_start: [ 1.e-6 ]
+        f_max: [ 1. ]
+        f_min: [ 1. ]
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 9  # 4 data + 4 downscaled image + 1 mask
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+
diff --git a/configs/v2-inference-v.yaml b/configs/v2-inference-v.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ec8dfbfefe94ae8522c93017668fea78d580acf
--- /dev/null
+++ b/configs/v2-inference-v.yaml
@@ -0,0 +1,68 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/v2-inference-v_fp32.yaml b/configs/v2-inference-v_fp32.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5c9b9cb29ca162ade44a7c922f59e75d7d57813
--- /dev/null
+++ b/configs/v2-inference-v_fp32.yaml
@@ -0,0 +1,68 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: False
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/v2-inference.yaml b/configs/v2-inference.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..152c4f3c2b36c3b246a9cb10eb8166134b0d2e1c
--- /dev/null
+++ b/configs/v2-inference.yaml
@@ -0,0 +1,67 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/v2-inference_fp32.yaml b/configs/v2-inference_fp32.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d03231f3f2c2e8ef8fbe0d781e5f3d65409ef3a
--- /dev/null
+++ b/configs/v2-inference_fp32.yaml
@@ -0,0 +1,67 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: False
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/v2-inpainting-inference.yaml b/configs/v2-inpainting-inference.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32a9471d71b828c51bcbbabfe34c5f6c8282c803
--- /dev/null
+++ b/configs/v2-inpainting-inference.yaml
@@ -0,0 +1,158 @@
+model:
+  base_learning_rate: 5.0e-05
+  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: hybrid
+    scale_factor: 0.18215
+    monitor: val/loss_simple_ema
+    finetune_keys: null
+    use_ema: False
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 9
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+            - 1
+            - 2
+            - 4
+            - 4
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
+
+
+data:
+  target: ldm.data.laion.WebDataModuleFromConfig
+  params:
+    tar_base: null  # for concat as in LAION-A
+    p_unsafe_threshold: 0.1
+    filter_word_list: "data/filters.yaml"
+    max_pwatermark: 0.45
+    batch_size: 8
+    num_workers: 6
+    multinode: True
+    min_size: 512
+    train:
+      shards:
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
+        - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -"  #{00000-94333}.tar"
+      shuffle: 10000
+      image_key: jpg
+      image_transforms:
+      - target: torchvision.transforms.Resize
+        params:
+          size: 512
+          interpolation: 3
+      - target: torchvision.transforms.RandomCrop
+        params:
+          size: 512
+      postprocess:
+        target: ldm.data.laion.AddMask
+        params:
+          mode: "512train-large"
+          p_drop: 0.25
+    # NOTE use enough shards to avoid empty validation loops in workers
+    validation:
+      shards:
+        - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
+      shuffle: 0
+      image_key: jpg
+      image_transforms:
+      - target: torchvision.transforms.Resize
+        params:
+          size: 512
+          interpolation: 3
+      - target: torchvision.transforms.CenterCrop
+        params:
+          size: 512
+      postprocess:
+        target: ldm.data.laion.AddMask
+        params:
+          mode: "512train-large"
+          p_drop: 0.25
+
+lightning:
+  find_unused_parameters: True
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 5000
+
+  callbacks:
+    metrics_over_trainsteps_checkpoint:
+      params:
+        every_n_train_steps: 10000
+
+    image_logger:
+      target: main.ImageLogger
+      params:
+        enable_autocast: False
+        disabled: False
+        batch_frequency: 1000
+        max_images: 4
+        increase_log_steps: False
+        log_first_step: False
+        log_images_kwargs:
+          use_ema_scope: False
+          inpaint: False
+          plot_progressive_rows: False
+          plot_diffusion_rows: False
+          N: 4
+          unconditional_guidance_scale: 5.0
+          unconditional_guidance_label: [""]
+          ddim_steps: 50  # todo check these out for depth2img,
+          ddim_eta: 0.0   # todo check these out for depth2img,
+
+  trainer:
+    benchmark: True
+    val_check_interval: 5000000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 1
diff --git a/controlnet/put_controlnets_and_t2i_here b/controlnet/put_controlnets_and_t2i_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detection/vitpose_h_wholebody_data.bin b/detection/vitpose_h_wholebody_data.bin
new file mode 100644
index 0000000000000000000000000000000000000000..309a258c01a80402762b1f57208b90c2f2803ae6
--- /dev/null
+++ b/detection/vitpose_h_wholebody_data.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6a9e7cb3a87ed65a098b096029e70150408acfafc3d695019a66b289d7719e1
+size 2548958740
diff --git a/detection/vitpose_h_wholebody_model.onnx b/detection/vitpose_h_wholebody_model.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..26819d54d1113adb18c975435f4c2104094802cd
--- /dev/null
+++ b/detection/vitpose_h_wholebody_model.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f21466cd6c93d0066782ad5923c14a4e6569133def212dc2895c73596c2e553b
+size 420252
diff --git a/detection/yolov10m.onnx b/detection/yolov10m.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..b781a65ff19ca23d2db19972c662f71104a4ecc8
--- /dev/null
+++ b/detection/yolov10m.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89b526498a6d55f869a6ab52e3a2eb20ad45b3711c1f7de3dd9ca0b399dfd6d7
+size 61659339
diff --git a/diffusers/put_diffusers_models_here b/diffusers/put_diffusers_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/diffusion_models/put_diffusion_model_files_here b/diffusion_models/put_diffusion_model_files_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/diffusion_models/wan2.2_animate_14B_bf16.safetensors b/diffusion_models/wan2.2_animate_14B_bf16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..77ddd725cd59a9b6efe7c269a8ab207825cc7be9
--- /dev/null
+++ b/diffusion_models/wan2.2_animate_14B_bf16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d37cb0120488dec2a061135aa3018426925318a60838c3de935c829abc88667
+size 34549787368
diff --git a/diffusion_models/wan2.2_t2v_low_noise_14B_fp16.safetensors b/diffusion_models/wan2.2_t2v_low_noise_14B_fp16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b9aae7cbf1245a23edc3a2a03f32e2ae43c272c
--- /dev/null
+++ b/diffusion_models/wan2.2_t2v_low_noise_14B_fp16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431d1613ffa809ae1f735b661a01788c6d74991f51efd01f45d5aee955ccd224
+size 28577095592
diff --git a/embeddings/put_embeddings_or_textual_inversion_concepts_here b/embeddings/put_embeddings_or_textual_inversion_concepts_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/gligen/put_gligen_models_here b/gligen/put_gligen_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/hypernetworks/put_hypernetworks_here b/hypernetworks/put_hypernetworks_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/latent_upscale_models/put_latent_upscale_models_here b/latent_upscale_models/put_latent_upscale_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/loras/lightx2v_I2V_14B_480p_cfg_step_distill_rank256_bf16.safetensors b/loras/lightx2v_I2V_14B_480p_cfg_step_distill_rank256_bf16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ce8dfe3127d8fd1080e3b1e8fb37793b84ba0245
--- /dev/null
+++ b/loras/lightx2v_I2V_14B_480p_cfg_step_distill_rank256_bf16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f974aced60f1d833eca0f5d6851346fbb6eacc307c6371555b565f5617fe3859
+size 2923503944
diff --git a/loras/lightx2v_elite_it2v_animate_face.safetensors b/loras/lightx2v_elite_it2v_animate_face.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..414d9578ae2aba7230710a66091f9107c82ea92e
--- /dev/null
+++ b/loras/lightx2v_elite_it2v_animate_face.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d85056b5a2df1f211a42c232ce71c0c23636313c84460df21765eee211cb518
+size 3257907064
diff --git a/loras/put_loras_here b/loras/put_loras_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/loras/wan2.2_animate_14B_relight_lora_bf16.safetensors b/loras/wan2.2_animate_14B_relight_lora_bf16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0d09b9692f7a7f27bff2682d98e2558a2d44d23
--- /dev/null
+++ b/loras/wan2.2_animate_14B_relight_lora_bf16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f4b6b9d3bc745a86e7bfd511f3880d90cf59c3bded9584d92c028f583fa74a3
+size 1436673432
diff --git a/model_patches/put_model_patches_here b/model_patches/put_model_patches_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/photomaker/put_photomaker_models_here b/photomaker/put_photomaker_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/sam2/.cache/huggingface/.gitignore b/sam2/.cache/huggingface/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f59ec20aabf5842d237244ece8c81ab184faeac1
--- /dev/null
+++ b/sam2/.cache/huggingface/.gitignore
@@ -0,0 +1 @@
+*
\ No newline at end of file
diff --git a/sam2/.cache/huggingface/download/sam2.1_hiera_base_plus-fp16.safetensors.lock b/sam2/.cache/huggingface/download/sam2.1_hiera_base_plus-fp16.safetensors.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/sam2/.cache/huggingface/download/sam2.1_hiera_base_plus-fp16.safetensors.metadata b/sam2/.cache/huggingface/download/sam2.1_hiera_base_plus-fp16.safetensors.metadata
new file mode 100644
index 0000000000000000000000000000000000000000..d7618916a28c1414a688fd64bd3950a8016daf20
--- /dev/null
+++ b/sam2/.cache/huggingface/download/sam2.1_hiera_base_plus-fp16.safetensors.metadata
@@ -0,0 +1,3 @@
+f885607d88bb3f9145efa49c3e3c50a9e5bf13eb
+a2693628452963a5f17e73a70a90b5faa112109307c828dec36e5fb407061005
+1775155236.8529332
diff --git a/sam2/sam2.1_hiera_base_plus-fp16.safetensors b/sam2/sam2.1_hiera_base_plus-fp16.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f30b72892eacb5500ab87cfc2c3308bc5f03a70
--- /dev/null
+++ b/sam2/sam2.1_hiera_base_plus-fp16.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2693628452963a5f17e73a70a90b5faa112109307c828dec36e5fb407061005
+size 161773292
diff --git a/sams/sam_vit_b_01ec64.pth b/sams/sam_vit_b_01ec64.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ab7d111e57bd052a76fe669986560e3555e9c8f6
--- /dev/null
+++ b/sams/sam_vit_b_01ec64.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
+size 375042383
diff --git a/sams/sam_vit_l_0b3195.pth b/sams/sam_vit_l_0b3195.pth
new file mode 100644
index 0000000000000000000000000000000000000000..87a638d6b789dd2b10fc7414a88dacc34a50769a
--- /dev/null
+++ b/sams/sam_vit_l_0b3195.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3adcc4315b642a4d2101128f611684e8734c41232a17c648ed1693702a49a622
+size 1249524607
diff --git a/style_models/put_t2i_style_model_here b/style_models/put_t2i_style_model_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/text_encoders/put_text_encoder_files_here b/text_encoders/put_text_encoder_files_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/ultralytics/bbox/face_yolov8m.pt b/ultralytics/bbox/face_yolov8m.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbfa5813f1ecf8c0b80c12fc5951a706afdeaf30
--- /dev/null
+++ b/ultralytics/bbox/face_yolov8m.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3893a92c5c1907136b6cc75404094db767c1e0cfefe1b43e87dad72af2e4c9f
+size 51996128
diff --git a/ultralytics/bbox/hand_yolov8n.pt b/ultralytics/bbox/hand_yolov8n.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2ba2d9d01df9bd230fd4a110de7e0ce06875463
--- /dev/null
+++ b/ultralytics/bbox/hand_yolov8n.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29
+size 6237883
diff --git a/ultralytics/bbox/nails_seg_s_yolov8_v1.pt b/ultralytics/bbox/nails_seg_s_yolov8_v1.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2412ddd16f6f8102ce8e7b26a62e2eadb10c2c8
--- /dev/null
+++ b/ultralytics/bbox/nails_seg_s_yolov8_v1.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99b7d1c6ceb4bde32d80fe7ae8c8eb809c27d99b55cf9db54b6692afe68f4070
+size 23860899
diff --git a/ultralytics/bbox/nipple.pt b/ultralytics/bbox/nipple.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9884b138d6134a048a9b801bbb6cd9ad2a75a2a0
--- /dev/null
+++ b/ultralytics/bbox/nipple.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67e04f8d23cb3e56f94a2b892657e500184a9cceabd7cfad2adeb313c19c5b5c
+size 36613559
diff --git a/ultralytics/bbox/phone_v01.pt b/ultralytics/bbox/phone_v01.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30f8a9e8c0352ff1a20f84fdd410e1e733f45000
--- /dev/null
+++ b/ultralytics/bbox/phone_v01.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c69b2060fcbd530665cc20be3f95dee4762359d71906e5599e6816c7bc30ccf1
+size 6328174
diff --git a/ultralytics/bbox/pussy.pt b/ultralytics/bbox/pussy.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c885b229320bd28cec3ece88038efccd1544581
--- /dev/null
+++ b/ultralytics/bbox/pussy.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431f9bb029ca8a06905054cd56659268e165b6f98651fd651fd23d4dc4408a88
+size 6241198
diff --git a/ultralytics/bbox/yolov8m.pt b/ultralytics/bbox/yolov8m.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eea29123e0ae14239bf731ef2c912b8afa2cbd14
--- /dev/null
+++ b/ultralytics/bbox/yolov8m.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d4a90cdc7a21786cc59cd19778e9eafff836df9e2da32524737c7ee6efe4fe5
+size 52136884
diff --git a/ultralytics/bbox/yolov8n-pose.pt b/ultralytics/bbox/yolov8n-pose.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc480842e923acaa94cec02a5335edbef298a0e1
--- /dev/null
+++ b/ultralytics/bbox/yolov8n-pose.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6fa93dd1ee4a2c18c900a45c1d864a1c6f7aba75d84f91648a30b7fb641d212
+size 6832633
diff --git a/ultralytics/bbox/yolov8n.pt b/ultralytics/bbox/yolov8n.pt
new file mode 100644
index 0000000000000000000000000000000000000000..719e6f1dbdfe7c560e5933fc8b0c5a7e857d0234
--- /dev/null
+++ b/ultralytics/bbox/yolov8n.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796
diff --git a/ultralytics/segm/person_yolov8m-seg.pt b/ultralytics/segm/person_yolov8m-seg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b072e035e0c72e7b62767f4ba6d53be332da820
--- /dev/null
+++ b/ultralytics/segm/person_yolov8m-seg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ab26f517173b1fe8342d336a09f443eb61cb08dcbfc78d53fff4c2547ae81e
+size 54827683
diff --git a/ultralytics/segm/yolov8m-seg.pt b/ultralytics/segm/yolov8m-seg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9103e1eb76addbd7cf20ac60170e315f5a6eccd
--- /dev/null
+++ b/ultralytics/segm/yolov8m-seg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51fa7e5ef385efa6d5b1d8e31b73399be6ed5d7ca71bda4bd4b2794bb445c4f4
+size 54921020
diff --git a/ultralytics/segm/yolov8n-seg.pt b/ultralytics/segm/yolov8n-seg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a24abfad237ab25fef7484a351fd667b9e1e601
--- /dev/null
+++ b/ultralytics/segm/yolov8n-seg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7cd8f929e1903d78a12a48efecab430209f18dc46cb96c3599a5980c63c423c
+size 7071756
diff --git a/unet/put_unet_files_here b/unet/put_unet_files_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/upscale_models/1xSkinContrast-High-SuperUltraCompact.pth b/upscale_models/1xSkinContrast-High-SuperUltraCompact.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a4e6fa7b6974d6fa8568094386d9eaaf1d48bc8
--- /dev/null
+++ b/upscale_models/1xSkinContrast-High-SuperUltraCompact.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bbea62563203f099ea1cf60ce483ac60d22eb93d30f11be4cb1a229e3585e9e
+size 181474
diff --git a/upscale_models/put_esrgan_and_other_upscale_models_here b/upscale_models/put_esrgan_and_other_upscale_models_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/vae/put_vae_here b/vae/put_vae_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/vae/wan_2.1_vae.safetensors b/vae/wan_2.1_vae.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5ff32b52dac309662138ca42604696d34eaad7b8
--- /dev/null
+++ b/vae/wan_2.1_vae.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fc39d31359a4b0a64f55876d8ff7fa8d780956ae2cb13463b0223e15148976b
+size 253815318
diff --git a/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here b/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391