Upload folder using huggingface_hub (#1)
Browse files- 9d22a15de7fef0b364ed5bbee60bd3030b98433e78ba3db40dc101500c739431 (913dcf00f6aaf06c692a06a181c4a70ec306ba4a)
- 3db518073bcd5fba8efdb48022097e8bd47dbdbe2b0c68ad7fb5c43ff25da73a (8531ecc3155cef09b47c46be6ad655b2349be53e)
- f1b13e78d5bf760c9158f576d92b625833e838cde93d25f2328aae021d4a9db3 (b624aa55eeaab2d199f27d537798b1f5a0341e42)
- 2f115a8213e4e0c00008b3a756061be36bc19bcb27a1dbdd059d4d5644bd159b (b53e3cae6c7313ec79e42e7bb381af95647cd227)
- cef5a5ce8347bf8dd29b3df76c36775c4b65dfe24a713b330ba1564167b48e9c (816bdccdb7025c1144bb3675c978fcf4ed46d9a0)
- a5855e53c138b614e9083630e10f210110c6d707c0c78ab352ff95a0f5bad7c0 (16a25e795dbb03aaf6a8f86a55c33cd958ca52e6)
- 94b94c1f9dbe461aac261f44115631d1f325954bf370602f2ae7ac3bc1516cae (8d10e6592f307f378c91fd4acfbbf6e8a85cfe66)
- b902b608589867b43e67269003703e7083e6a4a63bb7a1e5c57d10b7f87fa3f6 (d0e7ee94bfdae5e4dbe98cf38dd21fe5b6c4296e)
- 37adc4fdf899b8df2d9ed4e081c411212868d379853a5fc887cd286a75bc711e (7e12d1ec989ecd7d642b754e7f7b76fb6b3b3814)
- 0652de142599331a9a236b5ba3b64ac4993b09b78cd9c2c678b1164d1e34a4d6 (1c6775aa47502c1cb486b7263c5ab1735a660729)
- mrnx2024-06-23_03-36-58-save-245-35-0.safetensors +3 -0
- mrnx2024-06-23_03-36-58-save-245-35-0.yaml +100 -0
- mrnx2024-06-23_03-44-33-save-490-70-0.safetensors +3 -0
- mrnx2024-06-23_03-44-33-save-490-70-0.yaml +100 -0
- mrnx2024-06-23_03-52-26-save-735-105-0.safetensors +3 -0
- mrnx2024-06-23_03-52-26-save-735-105-0.yaml +100 -0
- mrnx2024-06-23_04-00-21-save-980-140-0.safetensors +3 -0
- mrnx2024-06-23_04-00-21-save-980-140-0.yaml +100 -0
- mrnx2024-06-23_04-08-18-save-1225-175-0.safetensors +3 -0
- mrnx2024-06-23_04-08-18-save-1225-175-0.yaml +100 -0
- mrnx2024-06-23_04-16-12-save-1470-210-0.safetensors +3 -0
- mrnx2024-06-23_04-16-12-save-1470-210-0.yaml +100 -0
- mrnx2024-06-23_04-24-07-save-1715-245-0.safetensors +3 -0
- mrnx2024-06-23_04-24-07-save-1715-245-0.yaml +100 -0
- mrnx2024-06-23_04-32-05-save-1960-280-0.safetensors +3 -0
- mrnx2024-06-23_04-32-05-save-1960-280-0.yaml +100 -0
- mrnx2024-06-23_04-40-03-save-2205-315-0.safetensors +3 -0
- mrnx2024-06-23_04-40-03-save-2205-315-0.yaml +100 -0
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:005513975873e15f28feb79c07c77b83811ec4e215c4d4e711b2ab16ec3db9ca
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02c9236748e04330a4668d127381e00a885eab7474bf4f9629137cce71d003bb
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9722df53758d03690b866f7b65e3a35894799d07764891d50ba92affddd4cc48
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e207f485a40ffc4c66f3564bcc40a7799d507bc8aa271aed9a330ca31c0f2ec3
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5b2cc1e6ceabc06f609d6716613ce2ee8c3c725e2490e69c17d4f74bccf31f0
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35dbe6e59be1e77ba58b433b895421f0366b018ad3221c1315bfc75ffeeb6de8
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6db26e273e03304186e6c86e11c9f4660686f984de6021c4294830342426f19
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62bc7f96625f632c1c4d967e2deb378fdcf1d5c42688bcfe9fd9f85fe65519f0
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd07af660c3e74a1a63afe82ccd492b36297d45f41fa90bddaa1ac8451ec8a32
|
| 3 |
+
size 6938084280
|
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
params:
|
| 3 |
+
conditioner_config:
|
| 4 |
+
params:
|
| 5 |
+
emb_models:
|
| 6 |
+
- input_key: txt
|
| 7 |
+
is_trainable: false
|
| 8 |
+
params:
|
| 9 |
+
layer: hidden
|
| 10 |
+
layer_idx: 11
|
| 11 |
+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 12 |
+
- input_key: txt
|
| 13 |
+
is_trainable: false
|
| 14 |
+
params:
|
| 15 |
+
always_return_pooled: true
|
| 16 |
+
arch: ViT-bigG-14
|
| 17 |
+
freeze: true
|
| 18 |
+
layer: penultimate
|
| 19 |
+
legacy: false
|
| 20 |
+
version: laion2b_s39b_b160k
|
| 21 |
+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
|
| 22 |
+
- input_key: original_size_as_tuple
|
| 23 |
+
is_trainable: false
|
| 24 |
+
params:
|
| 25 |
+
outdim: 256
|
| 26 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 27 |
+
- input_key: crop_coords_top_left
|
| 28 |
+
is_trainable: false
|
| 29 |
+
params:
|
| 30 |
+
outdim: 256
|
| 31 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 32 |
+
- input_key: target_size_as_tuple
|
| 33 |
+
is_trainable: false
|
| 34 |
+
params:
|
| 35 |
+
outdim: 256
|
| 36 |
+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
|
| 37 |
+
target: sgm.modules.GeneralConditioner
|
| 38 |
+
denoiser_config:
|
| 39 |
+
params:
|
| 40 |
+
discretization_config:
|
| 41 |
+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
|
| 42 |
+
num_idx: 1000
|
| 43 |
+
scaling_config:
|
| 44 |
+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
|
| 45 |
+
weighting_config:
|
| 46 |
+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
|
| 47 |
+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
|
| 48 |
+
disable_first_stage_autocast: true
|
| 49 |
+
first_stage_config:
|
| 50 |
+
params:
|
| 51 |
+
ddconfig:
|
| 52 |
+
attn_resolutions: []
|
| 53 |
+
attn_type: vanilla-xformers
|
| 54 |
+
ch: 128
|
| 55 |
+
ch_mult:
|
| 56 |
+
- 1
|
| 57 |
+
- 2
|
| 58 |
+
- 4
|
| 59 |
+
- 4
|
| 60 |
+
double_z: true
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
in_channels: 3
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
out_ch: 3
|
| 65 |
+
resolution: 256
|
| 66 |
+
z_channels: 4
|
| 67 |
+
embed_dim: 4
|
| 68 |
+
lossconfig:
|
| 69 |
+
target: torch.nn.Identity
|
| 70 |
+
monitor: val/rec_loss
|
| 71 |
+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
|
| 72 |
+
network_config:
|
| 73 |
+
params:
|
| 74 |
+
adm_in_channels: 2816
|
| 75 |
+
attention_resolutions:
|
| 76 |
+
- 4
|
| 77 |
+
- 2
|
| 78 |
+
channel_mult:
|
| 79 |
+
- 1
|
| 80 |
+
- 2
|
| 81 |
+
- 4
|
| 82 |
+
context_dim: 2048
|
| 83 |
+
in_channels: 4
|
| 84 |
+
legacy: false
|
| 85 |
+
model_channels: 320
|
| 86 |
+
num_classes: sequential
|
| 87 |
+
num_head_channels: 64
|
| 88 |
+
num_res_blocks: 2
|
| 89 |
+
out_channels: 4
|
| 90 |
+
spatial_transformer_attn_type: softmax-xformers
|
| 91 |
+
transformer_depth:
|
| 92 |
+
- 1
|
| 93 |
+
- 2
|
| 94 |
+
- 10
|
| 95 |
+
use_checkpoint: true
|
| 96 |
+
use_linear_in_transformer: true
|
| 97 |
+
use_spatial_transformer: true
|
| 98 |
+
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
|
| 99 |
+
scale_factor: 0.13025
|
| 100 |
+
target: sgm.models.diffusion.DiffusionEngine
|