Upload folder using huggingface_hub

#1
by toto10 - opened
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. configs/INITIAL_MODELS.yaml +128 -0
  2. configs/models.yaml +19 -0
  3. configs/models.yaml.example +47 -0
  4. configs/stable-diffusion/v1-finetune.yaml +110 -0
  5. configs/stable-diffusion/v1-finetune_style.yaml +103 -0
  6. configs/stable-diffusion/v1-inference.yaml +79 -0
  7. configs/stable-diffusion/v1-inpainting-inference.yaml +79 -0
  8. configs/stable-diffusion/v1-m1-finetune.yaml +110 -0
  9. configs/stable-diffusion/v2-inference-v.yaml +68 -0
  10. configs/stable-diffusion/v2-inference.yaml +67 -0
  11. configs/stable-diffusion/v2-inpainting-inference-v.yaml +159 -0
  12. configs/stable-diffusion/v2-inpainting-inference.yaml +158 -0
  13. databases/invokeai.db +0 -0
  14. invokeai.yaml +41 -0
  15. models/core/convert/bert-base-uncased/special_tokens_map.json +7 -0
  16. models/core/convert/bert-base-uncased/tokenizer.json +0 -0
  17. models/core/convert/bert-base-uncased/tokenizer_config.json +13 -0
  18. models/core/convert/bert-base-uncased/vocab.txt +0 -0
  19. models/core/convert/clip-vit-large-patch14/config.json +25 -0
  20. models/core/convert/clip-vit-large-patch14/merges.txt +0 -0
  21. models/core/convert/clip-vit-large-patch14/model.safetensors +3 -0
  22. models/core/convert/clip-vit-large-patch14/special_tokens_map.json +24 -0
  23. models/core/convert/clip-vit-large-patch14/tokenizer_config.json +33 -0
  24. models/core/convert/clip-vit-large-patch14/vocab.json +0 -0
  25. models/core/convert/sd-vae-ft-mse/config.json +31 -0
  26. models/core/convert/sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
  27. models/core/convert/stable-diffusion-2-clip/text_encoder/config.json +25 -0
  28. models/core/convert/stable-diffusion-2-clip/text_encoder/model.safetensors +3 -0
  29. models/core/convert/stable-diffusion-2-clip/tokenizer/merges.txt +0 -0
  30. models/core/convert/stable-diffusion-2-clip/tokenizer/special_tokens_map.json +24 -0
  31. models/core/convert/stable-diffusion-2-clip/tokenizer/tokenizer_config.json +33 -0
  32. models/core/convert/stable-diffusion-2-clip/tokenizer/vocab.json +0 -0
  33. models/core/convert/stable-diffusion-safety-checker/config.json +168 -0
  34. models/core/convert/stable-diffusion-safety-checker/model.safetensors +3 -0
  35. models/core/convert/stable-diffusion-safety-checker/preprocessor_config.json +28 -0
  36. models/core/upscaling/realesrgan/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth +3 -0
  37. models/core/upscaling/realesrgan/RealESRGAN_x2plus.pth +3 -0
  38. models/core/upscaling/realesrgan/RealESRGAN_x4plus.pth +3 -0
  39. models/core/upscaling/realesrgan/RealESRGAN_x4plus_anime_6B.pth +3 -0
  40. models/sd-1/controlnet/canny/config.json +42 -0
  41. models/sd-1/controlnet/canny/diffusion_pytorch_model.safetensors +3 -0
  42. models/sd-1/controlnet/depth/config.json +42 -0
  43. models/sd-1/controlnet/depth/diffusion_pytorch_model.safetensors +3 -0
  44. models/sd-1/embedding/EasyNegative.safetensors +3 -0
  45. models/sd-1/lora/LowRA.safetensors +3 -0
  46. models/sd-1/main/darkSushi25D25D_v30/feature_extractor/preprocessor_config.json +28 -0
  47. models/sd-1/main/darkSushi25D25D_v30/model_index.json +33 -0
  48. models/sd-1/main/darkSushi25D25D_v30/safety_checker/config.json +168 -0
  49. models/sd-1/main/darkSushi25D25D_v30/safety_checker/model.safetensors +3 -0
  50. models/sd-1/main/darkSushi25D25D_v30/scheduler/scheduler_config.json +15 -0
configs/INITIAL_MODELS.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file predefines a few models that the user may want to install.
2
+ sd-1/main/stable-diffusion-v1-5:
3
+ description: Stable Diffusion version 1.5 diffusers model (4.27 GB)
4
+ repo_id: runwayml/stable-diffusion-v1-5
5
+ recommended: True
6
+ default: True
7
+ sd-1/main/stable-diffusion-inpainting:
8
+ description: RunwayML SD 1.5 model optimized for inpainting, diffusers version (4.27 GB)
9
+ repo_id: runwayml/stable-diffusion-inpainting
10
+ recommended: False
11
+ sd-2/main/stable-diffusion-2-1:
12
+ description: Stable Diffusion version 2.1 diffusers model, trained on 768 pixel images (5.21 GB)
13
+ repo_id: stabilityai/stable-diffusion-2-1
14
+ recommended: False
15
+ sd-2/main/stable-diffusion-2-inpainting:
16
+ description: Stable Diffusion version 2.0 inpainting model (5.21 GB)
17
+ repo_id: stabilityai/stable-diffusion-2-inpainting
18
+ recommended: False
19
+ sdxl/main/stable-diffusion-xl-base-0-9:
20
+ description: Stable Diffusion XL base model (12 GB; access token required)
21
+ repo_id: stabilityai/stable-diffusion-xl-base-0.9
22
+ recommended: False
23
+ sdxl-refiner/main/stable-diffusion-xl-refiner-0-9:
24
+ description: Stable Diffusion XL refiner model (12 GB; access token required)
25
+ repo_id: stabilityai/stable-diffusion-xl-refiner-0.9
26
+ recommended: False
27
+ sdxl/main/stable-diffusion-xl-base-1-0:
28
+ description: Stable Diffusion XL base model (12 GB; access token required)
29
+ repo_id: stabilityai/stable-diffusion-xl-base-1.0
30
+ recommended: False
31
+ sdxl-refiner/main/stable-diffusion-xl-refiner-1-0:
32
+ description: Stable Diffusion XL refiner model (12 GB; access token required)
33
+ repo_id: stabilityai/stable-diffusion-xl-refiner-1.0
34
+ recommended: False
35
+ sd-1/main/Analog-Diffusion:
36
+ description: An SD-1.5 model trained on diverse analog photographs (2.13 GB)
37
+ repo_id: wavymulder/Analog-Diffusion
38
+ recommended: False
39
+ sd-1/main/Deliberate:
40
+ description: Versatile model that produces detailed images up to 768px (4.27 GB)
41
+ repo_id: XpucT/Deliberate
42
+ recommended: False
43
+ sd-1/main/Dungeons-and-Diffusion:
44
+ description: Dungeons & Dragons characters (2.13 GB)
45
+ repo_id: 0xJustin/Dungeons-and-Diffusion
46
+ recommended: False
47
+ sd-1/main/dreamlike-photoreal-2:
48
+ description: A photorealistic model trained on 768 pixel images based on SD 1.5 (2.13 GB)
49
+ repo_id: dreamlike-art/dreamlike-photoreal-2.0
50
+ recommended: False
51
+ sd-1/main/Inkpunk-Diffusion:
52
+ description: Stylized illustrations inspired by Gorillaz, FLCL and Shinkawa; prompt with "nvinkpunk" (4.27 GB)
53
+ repo_id: Envvi/Inkpunk-Diffusion
54
+ recommended: False
55
+ sd-1/main/openjourney:
56
+ description: An SD 1.5 model fine tuned on Midjourney; prompt with "mdjrny-v4 style" (2.13 GB)
57
+ repo_id: prompthero/openjourney
58
+ recommended: False
59
+ sd-1/main/portraitplus:
60
+ description: An SD-1.5 model trained on close range portraits of people; prompt with "portrait+" (2.13 GB)
61
+ repo_id: wavymulder/portraitplus
62
+ recommended: False
63
+ sd-1/main/seek.art_MEGA:
64
+ repo_id: coreco/seek.art_MEGA
65
+ description: A general use SD-1.5 "anything" model that supports multiple styles (2.1 GB)
66
+ recommended: False
67
+ sd-1/main/trinart_stable_diffusion_v2:
68
+ description: An SD-1.5 model finetuned with ~40K assorted high resolution manga/anime-style images (2.13 GB)
69
+ repo_id: naclbit/trinart_stable_diffusion_v2
70
+ recommended: False
71
+ sd-1/main/waifu-diffusion:
72
+ description: An SD-1.5 model trained on 680k anime/manga-style images (2.13 GB)
73
+ repo_id: hakurei/waifu-diffusion
74
+ recommended: False
75
+ sd-1/controlnet/canny:
76
+ repo_id: lllyasviel/control_v11p_sd15_canny
77
+ recommended: True
78
+ sd-1/controlnet/inpaint:
79
+ repo_id: lllyasviel/control_v11p_sd15_inpaint
80
+ recommended: False
81
+ sd-1/controlnet/mlsd:
82
+ repo_id: lllyasviel/control_v11p_sd15_mlsd
83
+ recommended: False
84
+ sd-1/controlnet/depth:
85
+ repo_id: lllyasviel/control_v11f1p_sd15_depth
86
+ recommended: True
87
+ sd-1/controlnet/normal_bae:
88
+ repo_id: lllyasviel/control_v11p_sd15_normalbae
89
+ recommended: False
90
+ sd-1/controlnet/seg:
91
+ repo_id: lllyasviel/control_v11p_sd15_seg
92
+ recommended: False
93
+ sd-1/controlnet/lineart:
94
+ repo_id: lllyasviel/control_v11p_sd15_lineart
95
+ recommended: False
96
+ sd-1/controlnet/lineart_anime:
97
+ repo_id: lllyasviel/control_v11p_sd15s2_lineart_anime
98
+ recommended: False
99
+ sd-1/controlnet/openpose:
100
+ repo_id: lllyasviel/control_v11p_sd15_openpose
101
+ recommended: False
102
+ sd-1/controlnet/scribble:
103
+ repo_id: lllyasviel/control_v11p_sd15_scribble
104
+ recommended: False
105
+ sd-1/controlnet/softedge:
106
+ repo_id: lllyasviel/control_v11p_sd15_softedge
107
+ recommended: False
108
+ sd-1/controlnet/shuffle:
109
+ repo_id: lllyasviel/control_v11e_sd15_shuffle
110
+ recommended: False
111
+ sd-1/controlnet/tile:
112
+ repo_id: lllyasviel/control_v11f1e_sd15_tile
113
+ recommended: False
114
+ sd-1/controlnet/ip2p:
115
+ repo_id: lllyasviel/control_v11e_sd15_ip2p
116
+ recommended: False
117
+ sd-1/embedding/EasyNegative:
118
+ path: https://huggingface.co/embed/EasyNegative/resolve/main/EasyNegative.safetensors
119
+ recommended: True
120
+ sd-1/embedding/ahx-beta-453407d:
121
+ repo_id: sd-concepts-library/ahx-beta-453407d
122
+ recommended: False
123
+ sd-1/lora/LowRA:
124
+ path: https://civitai.com/api/download/models/63006
125
+ recommended: True
126
+ sd-1/lora/Ink scenery:
127
+ path: https://civitai.com/api/download/models/83390
128
+ recommended: False
configs/models.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file describes the alternative machine learning models
2
+ # available to InvokeAI script.
3
+ #
4
+ # To add a new model, follow the examples below. Each
5
+ # model requires a model config file, a weights file,
6
+ # and the width and height of the images it
7
+ # was trained on.
8
+ __metadata__:
9
+ version: 3.0.0
10
+ sd-1/main/stable-diffusion-v1-5:
11
+ path: models/sd-1/main/stable-diffusion-v1-5
12
+ description: Stable Diffusion version 1.5 diffusers model (4.27 GB)
13
+ variant: normal
14
+ format: diffusers
15
+ sd-1/main/darkSushi25D25D_v30:
16
+ path: models/sd-1/main/darkSushi25D25D_v30
17
+ description: sd-1 main model darkSushi25D25D_v30
18
+ variant: normal
19
+ format: diffusers
configs/models.yaml.example ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file describes the alternative machine learning models
2
+ # available to InvokeAI script.
3
+ #
4
+ # To add a new model, follow the examples below. Each
5
+ # model requires a model config file, a weights file,
6
+ # and the width and height of the images it
7
+ # was trained on.
8
+ diffusers-1.4:
9
+ description: 🤗🧨 Stable Diffusion v1.4
10
+ format: diffusers
11
+ repo_id: CompVis/stable-diffusion-v1-4
12
+ diffusers-1.5:
13
+ description: 🤗🧨 Stable Diffusion v1.5
14
+ format: diffusers
15
+ repo_id: runwayml/stable-diffusion-v1-5
16
+ default: true
17
+ diffusers-1.5+mse:
18
+ description: 🤗🧨 Stable Diffusion v1.5 + MSE-finetuned VAE
19
+ format: diffusers
20
+ repo_id: runwayml/stable-diffusion-v1-5
21
+ vae:
22
+ repo_id: stabilityai/sd-vae-ft-mse
23
+ diffusers-inpainting-1.5:
24
+ description: 🤗🧨 inpainting for Stable Diffusion v1.5
25
+ format: diffusers
26
+ repo_id: runwayml/stable-diffusion-inpainting
27
+ stable-diffusion-1.5:
28
+ description: The newest Stable Diffusion version 1.5 weight file (4.27 GB)
29
+ weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
30
+ config: configs/stable-diffusion/v1-inference.yaml
31
+ width: 512
32
+ height: 512
33
+ vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
34
+ stable-diffusion-1.4:
35
+ description: Stable Diffusion inference model version 1.4
36
+ config: configs/stable-diffusion/v1-inference.yaml
37
+ weights: models/ldm/stable-diffusion-v1/sd-v1-4.ckpt
38
+ vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
39
+ width: 512
40
+ height: 512
41
+ inpainting-1.5:
42
+ weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
43
+ config: configs/stable-diffusion/v1-inpainting-inference.yaml
44
+ vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
45
+ description: RunwayML SD 1.5 model optimized for inpainting
46
+ width: 512
47
+ height: 512
configs/stable-diffusion/v1-finetune.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-03
3
+ target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+ embedding_reg_weight: 0.0
20
+
21
+ personalization_config:
22
+ target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
23
+ params:
24
+ placeholder_strings: ["*"]
25
+ initializer_words: ["sculpture"]
26
+ per_image_tokens: false
27
+ num_vectors_per_token: 1
28
+ progressive_words: False
29
+
30
+ unet_config:
31
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
32
+ params:
33
+ image_size: 32 # unused
34
+ in_channels: 4
35
+ out_channels: 4
36
+ model_channels: 320
37
+ attention_resolutions: [ 4, 2, 1 ]
38
+ num_res_blocks: 2
39
+ channel_mult: [ 1, 2, 4, 4 ]
40
+ num_heads: 8
41
+ use_spatial_transformer: True
42
+ transformer_depth: 1
43
+ context_dim: 768
44
+ use_checkpoint: True
45
+ legacy: False
46
+
47
+ first_stage_config:
48
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
49
+ params:
50
+ embed_dim: 4
51
+ monitor: val/rec_loss
52
+ ddconfig:
53
+ double_z: true
54
+ z_channels: 4
55
+ resolution: 256
56
+ in_channels: 3
57
+ out_ch: 3
58
+ ch: 128
59
+ ch_mult:
60
+ - 1
61
+ - 2
62
+ - 4
63
+ - 4
64
+ num_res_blocks: 2
65
+ attn_resolutions: []
66
+ dropout: 0.0
67
+ lossconfig:
68
+ target: torch.nn.Identity
69
+
70
+ cond_stage_config:
71
+ target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
72
+
73
+ data:
74
+ target: main.DataModuleFromConfig
75
+ params:
76
+ batch_size: 1
77
+ num_workers: 2
78
+ wrap: false
79
+ train:
80
+ target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
81
+ params:
82
+ size: 512
83
+ set: train
84
+ per_image_tokens: false
85
+ repeats: 100
86
+ validation:
87
+ target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
88
+ params:
89
+ size: 512
90
+ set: val
91
+ per_image_tokens: false
92
+ repeats: 10
93
+
94
+ lightning:
95
+ modelcheckpoint:
96
+ params:
97
+ every_n_train_steps: 500
98
+ callbacks:
99
+ image_logger:
100
+ target: main.ImageLogger
101
+ params:
102
+ batch_frequency: 500
103
+ max_images: 8
104
+ increase_log_steps: False
105
+
106
+ trainer:
107
+ benchmark: True
108
+ max_steps: 4000000
109
+ # max_steps: 4000
110
+
configs/stable-diffusion/v1-finetune_style.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-03
3
+ target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+ embedding_reg_weight: 0.0
20
+
21
+ personalization_config:
22
+ target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
23
+ params:
24
+ placeholder_strings: ["*"]
25
+ initializer_words: ["painting"]
26
+ per_image_tokens: false
27
+ num_vectors_per_token: 1
28
+
29
+ unet_config:
30
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
31
+ params:
32
+ image_size: 32 # unused
33
+ in_channels: 4
34
+ out_channels: 4
35
+ model_channels: 320
36
+ attention_resolutions: [ 4, 2, 1 ]
37
+ num_res_blocks: 2
38
+ channel_mult: [ 1, 2, 4, 4 ]
39
+ num_heads: 8
40
+ use_spatial_transformer: True
41
+ transformer_depth: 1
42
+ context_dim: 768
43
+ use_checkpoint: True
44
+ legacy: False
45
+
46
+ first_stage_config:
47
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
48
+ params:
49
+ embed_dim: 4
50
+ monitor: val/rec_loss
51
+ ddconfig:
52
+ double_z: true
53
+ z_channels: 4
54
+ resolution: 256
55
+ in_channels: 3
56
+ out_ch: 3
57
+ ch: 128
58
+ ch_mult:
59
+ - 1
60
+ - 2
61
+ - 4
62
+ - 4
63
+ num_res_blocks: 2
64
+ attn_resolutions: []
65
+ dropout: 0.0
66
+ lossconfig:
67
+ target: torch.nn.Identity
68
+
69
+ cond_stage_config:
70
+ target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
71
+
72
+ data:
73
+ target: main.DataModuleFromConfig
74
+ params:
75
+ batch_size: 2
76
+ num_workers: 16
77
+ wrap: false
78
+ train:
79
+ target: invokeai.backend.stable_diffusion.data.personalized_style.PersonalizedBase
80
+ params:
81
+ size: 512
82
+ set: train
83
+ per_image_tokens: false
84
+ repeats: 100
85
+ validation:
86
+ target: invokeai.backend.stable_diffusion.data.personalized_style.PersonalizedBase
87
+ params:
88
+ size: 512
89
+ set: val
90
+ per_image_tokens: false
91
+ repeats: 10
92
+
93
+ lightning:
94
+ callbacks:
95
+ image_logger:
96
+ target: main.ImageLogger
97
+ params:
98
+ batch_frequency: 500
99
+ max_images: 8
100
+ increase_log_steps: False
101
+
102
+ trainer:
103
+ benchmark: True
configs/stable-diffusion/v1-inference.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-04
3
+ target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: invokeai.backend.stable_diffusion.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 10000 ]
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ personalization_config:
30
+ target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
31
+ params:
32
+ placeholder_strings: ["*"]
33
+ initializer_words: ['sculpture']
34
+ per_image_tokens: false
35
+ num_vectors_per_token: 1
36
+ progressive_words: False
37
+
38
+ unet_config:
39
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
40
+ params:
41
+ image_size: 32 # unused
42
+ in_channels: 4
43
+ out_channels: 4
44
+ model_channels: 320
45
+ attention_resolutions: [ 4, 2, 1 ]
46
+ num_res_blocks: 2
47
+ channel_mult: [ 1, 2, 4, 4 ]
48
+ num_heads: 8
49
+ use_spatial_transformer: True
50
+ transformer_depth: 1
51
+ context_dim: 768
52
+ use_checkpoint: True
53
+ legacy: False
54
+
55
+ first_stage_config:
56
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
57
+ params:
58
+ embed_dim: 4
59
+ monitor: val/rec_loss
60
+ ddconfig:
61
+ double_z: true
62
+ z_channels: 4
63
+ resolution: 256
64
+ in_channels: 3
65
+ out_ch: 3
66
+ ch: 128
67
+ ch_mult:
68
+ - 1
69
+ - 2
70
+ - 4
71
+ - 4
72
+ num_res_blocks: 2
73
+ attn_resolutions: []
74
+ dropout: 0.0
75
+ lossconfig:
76
+ target: torch.nn.Identity
77
+
78
+ cond_stage_config:
79
+ target: invokeai.backend.stable_diffusion.encoders.modules.WeightedFrozenCLIPEmbedder
configs/stable-diffusion/v1-inpainting-inference.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 7.5e-05
3
+ target: invokeai.backend.models.diffusion.ddpm.LatentInpaintDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false # Note: different from the one we trained before
15
+ conditioning_key: hybrid # important
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ finetune_keys: null
19
+
20
+ scheduler_config: # 10000 warmup steps
21
+ target: invokeai.backend.stable_diffusion.lr_scheduler.LambdaLinearScheduler
22
+ params:
23
+ warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
24
+ cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
+ f_start: [ 1.e-6 ]
26
+ f_max: [ 1. ]
27
+ f_min: [ 1. ]
28
+
29
+ personalization_config:
30
+ target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
31
+ params:
32
+ placeholder_strings: ["*"]
33
+ initializer_words: ['sculpture']
34
+ per_image_tokens: false
35
+ num_vectors_per_token: 8
36
+ progressive_words: False
37
+
38
+ unet_config:
39
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
40
+ params:
41
+ image_size: 32 # unused
42
+ in_channels: 9 # 4 data + 4 downscaled image + 1 mask
43
+ out_channels: 4
44
+ model_channels: 320
45
+ attention_resolutions: [ 4, 2, 1 ]
46
+ num_res_blocks: 2
47
+ channel_mult: [ 1, 2, 4, 4 ]
48
+ num_heads: 8
49
+ use_spatial_transformer: True
50
+ transformer_depth: 1
51
+ context_dim: 768
52
+ use_checkpoint: True
53
+ legacy: False
54
+
55
+ first_stage_config:
56
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
57
+ params:
58
+ embed_dim: 4
59
+ monitor: val/rec_loss
60
+ ddconfig:
61
+ double_z: true
62
+ z_channels: 4
63
+ resolution: 256
64
+ in_channels: 3
65
+ out_ch: 3
66
+ ch: 128
67
+ ch_mult:
68
+ - 1
69
+ - 2
70
+ - 4
71
+ - 4
72
+ num_res_blocks: 2
73
+ attn_resolutions: []
74
+ dropout: 0.0
75
+ lossconfig:
76
+ target: torch.nn.Identity
77
+
78
+ cond_stage_config:
79
+ target: invokeai.backend.stable_diffusion.encoders.modules.WeightedFrozenCLIPEmbedder
configs/stable-diffusion/v1-m1-finetune.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-03
3
+ target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true # Note: different from the one we trained before
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+ embedding_reg_weight: 0.0
20
+
21
+ personalization_config:
22
+ target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
23
+ params:
24
+ placeholder_strings: ["*"]
25
+ initializer_words: ['sculpture']
26
+ per_image_tokens: false
27
+ num_vectors_per_token: 6
28
+ progressive_words: False
29
+
30
+ unet_config:
31
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
32
+ params:
33
+ image_size: 32 # unused
34
+ in_channels: 4
35
+ out_channels: 4
36
+ model_channels: 320
37
+ attention_resolutions: [ 4, 2, 1 ]
38
+ num_res_blocks: 2
39
+ channel_mult: [ 1, 2, 4, 4 ]
40
+ num_heads: 8
41
+ use_spatial_transformer: True
42
+ transformer_depth: 1
43
+ context_dim: 768
44
+ use_checkpoint: True
45
+ legacy: False
46
+
47
+ first_stage_config:
48
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
49
+ params:
50
+ embed_dim: 4
51
+ monitor: val/rec_loss
52
+ ddconfig:
53
+ double_z: true
54
+ z_channels: 4
55
+ resolution: 256
56
+ in_channels: 3
57
+ out_ch: 3
58
+ ch: 128
59
+ ch_mult:
60
+ - 1
61
+ - 2
62
+ - 4
63
+ - 4
64
+ num_res_blocks: 2
65
+ attn_resolutions: []
66
+ dropout: 0.0
67
+ lossconfig:
68
+ target: torch.nn.Identity
69
+
70
+ cond_stage_config:
71
+ target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
72
+
73
+ data:
74
+ target: main.DataModuleFromConfig
75
+ params:
76
+ batch_size: 1
77
+ num_workers: 2
78
+ wrap: false
79
+ train:
80
+ target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
81
+ params:
82
+ size: 512
83
+ set: train
84
+ per_image_tokens: false
85
+ repeats: 100
86
+ validation:
87
+ target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
88
+ params:
89
+ size: 512
90
+ set: val
91
+ per_image_tokens: false
92
+ repeats: 10
93
+
94
+ lightning:
95
+ modelcheckpoint:
96
+ params:
97
+ every_n_train_steps: 500
98
+ callbacks:
99
+ image_logger:
100
+ target: main.ImageLogger
101
+ params:
102
+ batch_frequency: 500
103
+ max_images: 5
104
+ increase_log_steps: False
105
+
106
+ trainer:
107
+ benchmark: False
108
+ max_steps: 6200
109
+ # max_steps: 4000
110
+
configs/stable-diffusion/v2-inference-v.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ parameterization: "v"
6
+ linear_start: 0.00085
7
+ linear_end: 0.0120
8
+ num_timesteps_cond: 1
9
+ log_every_t: 200
10
+ timesteps: 1000
11
+ first_stage_key: "jpg"
12
+ cond_stage_key: "txt"
13
+ image_size: 64
14
+ channels: 4
15
+ cond_stage_trainable: false
16
+ conditioning_key: crossattn
17
+ monitor: val/loss_simple_ema
18
+ scale_factor: 0.18215
19
+ use_ema: False # we set this to false because this is an inference only config
20
+
21
+ unet_config:
22
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ use_checkpoint: True
25
+ use_fp16: True
26
+ image_size: 32 # unused
27
+ in_channels: 4
28
+ out_channels: 4
29
+ model_channels: 320
30
+ attention_resolutions: [ 4, 2, 1 ]
31
+ num_res_blocks: 2
32
+ channel_mult: [ 1, 2, 4, 4 ]
33
+ num_head_channels: 64 # need to fix for flash-attn
34
+ use_spatial_transformer: True
35
+ use_linear_in_transformer: True
36
+ transformer_depth: 1
37
+ context_dim: 1024
38
+ legacy: False
39
+
40
+ first_stage_config:
41
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
42
+ params:
43
+ embed_dim: 4
44
+ monitor: val/rec_loss
45
+ ddconfig:
46
+ #attn_type: "vanilla-xformers"
47
+ double_z: true
48
+ z_channels: 4
49
+ resolution: 256
50
+ in_channels: 3
51
+ out_ch: 3
52
+ ch: 128
53
+ ch_mult:
54
+ - 1
55
+ - 2
56
+ - 4
57
+ - 4
58
+ num_res_blocks: 2
59
+ attn_resolutions: []
60
+ dropout: 0.0
61
+ lossconfig:
62
+ target: torch.nn.Identity
63
+
64
+ cond_stage_config:
65
+ target: invokeai.backend.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
66
+ params:
67
+ freeze: True
68
+ layer: "penultimate"
configs/stable-diffusion/v2-inference.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-4
3
+ target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False # we set this to false because this is an inference only config
19
+
20
+ unet_config:
21
+ target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
22
+ params:
23
+ use_checkpoint: True
24
+ use_fp16: True
25
+ image_size: 32 # unused
26
+ in_channels: 4
27
+ out_channels: 4
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ first_stage_config:
40
+ target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
41
+ params:
42
+ embed_dim: 4
43
+ monitor: val/rec_loss
44
+ ddconfig:
45
+ #attn_type: "vanilla-xformers"
46
+ double_z: true
47
+ z_channels: 4
48
+ resolution: 256
49
+ in_channels: 3
50
+ out_ch: 3
51
+ ch: 128
52
+ ch_mult:
53
+ - 1
54
+ - 2
55
+ - 4
56
+ - 4
57
+ num_res_blocks: 2
58
+ attn_resolutions: []
59
+ dropout: 0.0
60
+ lossconfig:
61
+ target: torch.nn.Identity
62
+
63
+ cond_stage_config:
64
+ target: invokeai.backend.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
65
+ params:
66
+ freeze: True
67
+ layer: "penultimate"
configs/stable-diffusion/v2-inpainting-inference-v.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-05
3
+ target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ parameterization: "v"
8
+ num_timesteps_cond: 1
9
+ log_every_t: 200
10
+ timesteps: 1000
11
+ first_stage_key: "jpg"
12
+ cond_stage_key: "txt"
13
+ image_size: 64
14
+ channels: 4
15
+ cond_stage_trainable: false
16
+ conditioning_key: hybrid
17
+ scale_factor: 0.18215
18
+ monitor: val/loss_simple_ema
19
+ finetune_keys: null
20
+ use_ema: False
21
+
22
+ unet_config:
23
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
24
+ params:
25
+ use_checkpoint: True
26
+ image_size: 32 # unused
27
+ in_channels: 9
28
+ out_channels: 4
29
+ model_channels: 320
30
+ attention_resolutions: [ 4, 2, 1 ]
31
+ num_res_blocks: 2
32
+ channel_mult: [ 1, 2, 4, 4 ]
33
+ num_head_channels: 64 # need to fix for flash-attn
34
+ use_spatial_transformer: True
35
+ use_linear_in_transformer: True
36
+ transformer_depth: 1
37
+ context_dim: 1024
38
+ legacy: False
39
+
40
+ first_stage_config:
41
+ target: ldm.models.autoencoder.AutoencoderKL
42
+ params:
43
+ embed_dim: 4
44
+ monitor: val/rec_loss
45
+ ddconfig:
46
+ #attn_type: "vanilla-xformers"
47
+ double_z: true
48
+ z_channels: 4
49
+ resolution: 256
50
+ in_channels: 3
51
+ out_ch: 3
52
+ ch: 128
53
+ ch_mult:
54
+ - 1
55
+ - 2
56
+ - 4
57
+ - 4
58
+ num_res_blocks: 2
59
+ attn_resolutions: [ ]
60
+ dropout: 0.0
61
+ lossconfig:
62
+ target: torch.nn.Identity
63
+
64
+ cond_stage_config:
65
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
+ params:
67
+ freeze: True
68
+ layer: "penultimate"
69
+
70
+
71
+ data:
72
+ target: ldm.data.laion.WebDataModuleFromConfig
73
+ params:
74
+ tar_base: null # for concat as in LAION-A
75
+ p_unsafe_threshold: 0.1
76
+ filter_word_list: "data/filters.yaml"
77
+ max_pwatermark: 0.45
78
+ batch_size: 8
79
+ num_workers: 6
80
+ multinode: True
81
+ min_size: 512
82
+ train:
83
+ shards:
84
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
85
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
86
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
87
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
88
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
89
+ shuffle: 10000
90
+ image_key: jpg
91
+ image_transforms:
92
+ - target: torchvision.transforms.Resize
93
+ params:
94
+ size: 512
95
+ interpolation: 3
96
+ - target: torchvision.transforms.RandomCrop
97
+ params:
98
+ size: 512
99
+ postprocess:
100
+ target: ldm.data.laion.AddMask
101
+ params:
102
+ mode: "512train-large"
103
+ p_drop: 0.25
104
+ # NOTE use enough shards to avoid empty validation loops in workers
105
+ validation:
106
+ shards:
107
+ - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
108
+ shuffle: 0
109
+ image_key: jpg
110
+ image_transforms:
111
+ - target: torchvision.transforms.Resize
112
+ params:
113
+ size: 512
114
+ interpolation: 3
115
+ - target: torchvision.transforms.CenterCrop
116
+ params:
117
+ size: 512
118
+ postprocess:
119
+ target: ldm.data.laion.AddMask
120
+ params:
121
+ mode: "512train-large"
122
+ p_drop: 0.25
123
+
124
+ lightning:
125
+ find_unused_parameters: True
126
+ modelcheckpoint:
127
+ params:
128
+ every_n_train_steps: 5000
129
+
130
+ callbacks:
131
+ metrics_over_trainsteps_checkpoint:
132
+ params:
133
+ every_n_train_steps: 10000
134
+
135
+ image_logger:
136
+ target: main.ImageLogger
137
+ params:
138
+ enable_autocast: False
139
+ disabled: False
140
+ batch_frequency: 1000
141
+ max_images: 4
142
+ increase_log_steps: False
143
+ log_first_step: False
144
+ log_images_kwargs:
145
+ use_ema_scope: False
146
+ inpaint: False
147
+ plot_progressive_rows: False
148
+ plot_diffusion_rows: False
149
+ N: 4
150
+ unconditional_guidance_scale: 5.0
151
+ unconditional_guidance_label: [""]
152
+ ddim_steps: 50 # todo check these out for depth2img,
153
+ ddim_eta: 0.0 # todo check these out for depth2img,
154
+
155
+ trainer:
156
+ benchmark: True
157
+ val_check_interval: 5000000
158
+ num_sanity_val_steps: 0
159
+ accumulate_grad_batches: 1
configs/stable-diffusion/v2-inpainting-inference.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 5.0e-05
3
+ target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: "jpg"
11
+ cond_stage_key: "txt"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: hybrid
16
+ scale_factor: 0.18215
17
+ monitor: val/loss_simple_ema
18
+ finetune_keys: null
19
+ use_ema: False
20
+
21
+ unet_config:
22
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ use_checkpoint: True
25
+ image_size: 32 # unused
26
+ in_channels: 9
27
+ out_channels: 4
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ first_stage_config:
40
+ target: ldm.models.autoencoder.AutoencoderKL
41
+ params:
42
+ embed_dim: 4
43
+ monitor: val/rec_loss
44
+ ddconfig:
45
+ #attn_type: "vanilla-xformers"
46
+ double_z: true
47
+ z_channels: 4
48
+ resolution: 256
49
+ in_channels: 3
50
+ out_ch: 3
51
+ ch: 128
52
+ ch_mult:
53
+ - 1
54
+ - 2
55
+ - 4
56
+ - 4
57
+ num_res_blocks: 2
58
+ attn_resolutions: [ ]
59
+ dropout: 0.0
60
+ lossconfig:
61
+ target: torch.nn.Identity
62
+
63
+ cond_stage_config:
64
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
+ params:
66
+ freeze: True
67
+ layer: "penultimate"
68
+
69
+
70
+ data:
71
+ target: ldm.data.laion.WebDataModuleFromConfig
72
+ params:
73
+ tar_base: null # for concat as in LAION-A
74
+ p_unsafe_threshold: 0.1
75
+ filter_word_list: "data/filters.yaml"
76
+ max_pwatermark: 0.45
77
+ batch_size: 8
78
+ num_workers: 6
79
+ multinode: True
80
+ min_size: 512
81
+ train:
82
+ shards:
83
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
84
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
85
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
86
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
87
+ - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
88
+ shuffle: 10000
89
+ image_key: jpg
90
+ image_transforms:
91
+ - target: torchvision.transforms.Resize
92
+ params:
93
+ size: 512
94
+ interpolation: 3
95
+ - target: torchvision.transforms.RandomCrop
96
+ params:
97
+ size: 512
98
+ postprocess:
99
+ target: ldm.data.laion.AddMask
100
+ params:
101
+ mode: "512train-large"
102
+ p_drop: 0.25
103
+ # NOTE use enough shards to avoid empty validation loops in workers
104
+ validation:
105
+ shards:
106
+ - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
107
+ shuffle: 0
108
+ image_key: jpg
109
+ image_transforms:
110
+ - target: torchvision.transforms.Resize
111
+ params:
112
+ size: 512
113
+ interpolation: 3
114
+ - target: torchvision.transforms.CenterCrop
115
+ params:
116
+ size: 512
117
+ postprocess:
118
+ target: ldm.data.laion.AddMask
119
+ params:
120
+ mode: "512train-large"
121
+ p_drop: 0.25
122
+
123
+ lightning:
124
+ find_unused_parameters: True
125
+ modelcheckpoint:
126
+ params:
127
+ every_n_train_steps: 5000
128
+
129
+ callbacks:
130
+ metrics_over_trainsteps_checkpoint:
131
+ params:
132
+ every_n_train_steps: 10000
133
+
134
+ image_logger:
135
+ target: main.ImageLogger
136
+ params:
137
+ enable_autocast: False
138
+ disabled: False
139
+ batch_frequency: 1000
140
+ max_images: 4
141
+ increase_log_steps: False
142
+ log_first_step: False
143
+ log_images_kwargs:
144
+ use_ema_scope: False
145
+ inpaint: False
146
+ plot_progressive_rows: False
147
+ plot_diffusion_rows: False
148
+ N: 4
149
+ unconditional_guidance_scale: 5.0
150
+ unconditional_guidance_label: [""]
151
+ ddim_steps: 50 # todo check these out for depth2img,
152
+ ddim_eta: 0.0 # todo check these out for depth2img,
153
+
154
+ trainer:
155
+ benchmark: True
156
+ val_check_interval: 5000000
157
+ num_sanity_val_steps: 0
158
+ accumulate_grad_batches: 1
databases/invokeai.db ADDED
Binary file (127 kB). View file
 
invokeai.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ InvokeAI:
2
+ Web Server:
3
+ host: 127.0.0.1
4
+ port: 9090
5
+ allow_origins: []
6
+ allow_credentials: true
7
+ allow_methods:
8
+ - '*'
9
+ allow_headers:
10
+ - '*'
11
+ Features:
12
+ esrgan: true
13
+ internet_available: true
14
+ log_tokenization: false
15
+ nsfw_checker: true
16
+ patchmatch: true
17
+ Memory/Performance:
18
+ always_use_cpu: false
19
+ free_gpu_mem: false
20
+ max_cache_size: 6.0
21
+ max_vram_cache_size: 2.75
22
+ precision: float32
23
+ sequential_guidance: false
24
+ xformers_enabled: true
25
+ tiled_decode: false
26
+ Paths:
27
+ autoimport_dir: autoimport
28
+ lora_dir: null
29
+ embedding_dir: null
30
+ controlnet_dir: null
31
+ conf_path: configs/models.yaml
32
+ models_dir: models
33
+ legacy_conf_dir: configs/stable-diffusion
34
+ db_dir: databases
35
+ outdir: outputs
36
+ use_memory_db: false
37
+ Logging:
38
+ log_handlers:
39
+ - console
40
+ log_format: color
41
+ log_level: info
models/core/convert/bert-base-uncased/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/core/convert/bert-base-uncased/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/bert-base-uncased/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
models/core/convert/bert-base-uncased/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/clip-vit-large-patch14/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.31.0",
24
+ "vocab_size": 49408
25
+ }
models/core/convert/clip-vit-large-patch14/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/clip-vit-large-patch14/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
3
+ size 492265168
models/core/convert/clip-vit-large-patch14/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/core/convert/clip-vit-large-patch14/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 77,
23
+ "pad_token": "<|endoftext|>",
24
+ "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
models/core/convert/clip-vit-large-patch14/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/sd-vae-ft-mse/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.18.2",
4
+ "_name_or_path": "stabilityai/sd-vae-ft-mse",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "layers_per_block": 2,
21
+ "norm_num_groups": 32,
22
+ "out_channels": 3,
23
+ "sample_size": 256,
24
+ "scaling_factor": 0.18215,
25
+ "up_block_types": [
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D"
30
+ ]
31
+ }
models/core/convert/sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aa1f43011b553a4cba7f37456465cdbd48aab7b54b9348b890e8058ea7683ec
3
+ size 334643268
models/core/convert/stable-diffusion-2-clip/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "stabilityai/stable-diffusion-2",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.31.0",
24
+ "vocab_size": 49408
25
+ }
models/core/convert/stable-diffusion-2-clip/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e013543d4fac905c882e2993d86a2d454ee69dc9e8f37c0c23d33a48959d15
3
+ size 1361596304
models/core/convert/stable-diffusion-2-clip/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/stable-diffusion-2-clip/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/core/convert/stable-diffusion-2-clip/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 77,
23
+ "pad_token": "<|endoftext|>",
24
+ "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
models/core/convert/stable-diffusion-2-clip/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/core/convert/stable-diffusion-safety-checker/config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
3
+ "_name_or_path": "CompVis/stable-diffusion-safety-checker",
4
+ "architectures": [
5
+ "StableDiffusionSafetyChecker"
6
+ ],
7
+ "initializer_factor": 1.0,
8
+ "logit_scale_init_value": 2.6592,
9
+ "model_type": "clip",
10
+ "projection_dim": 768,
11
+ "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 49406,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
+ "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 49407,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
+ "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
+ "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "projection_dim": 512,
66
+ "pruned_heads": {},
67
+ "remove_invalid_values": false,
68
+ "repetition_penalty": 1.0,
69
+ "return_dict": true,
70
+ "return_dict_in_generate": false,
71
+ "sep_token_id": null,
72
+ "suppress_tokens": null,
73
+ "task_specific_params": null,
74
+ "temperature": 1.0,
75
+ "tf_legacy_loss": false,
76
+ "tie_encoder_decoder": false,
77
+ "tie_word_embeddings": true,
78
+ "tokenizer_class": null,
79
+ "top_k": 50,
80
+ "top_p": 1.0,
81
+ "torch_dtype": null,
82
+ "torchscript": false,
83
+ "transformers_version": "4.31.0",
84
+ "typical_p": 1.0,
85
+ "use_bfloat16": false,
86
+ "vocab_size": 49408
87
+ },
88
+ "torch_dtype": "float32",
89
+ "transformers_version": null,
90
+ "vision_config": {
91
+ "_name_or_path": "",
92
+ "add_cross_attention": false,
93
+ "architectures": null,
94
+ "attention_dropout": 0.0,
95
+ "bad_words_ids": null,
96
+ "begin_suppress_tokens": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
+ "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "exponential_decay_length_penalty": null,
108
+ "finetuning_task": null,
109
+ "forced_bos_token_id": null,
110
+ "forced_eos_token_id": null,
111
+ "hidden_act": "quick_gelu",
112
+ "hidden_size": 1024,
113
+ "id2label": {
114
+ "0": "LABEL_0",
115
+ "1": "LABEL_1"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_factor": 1.0,
119
+ "initializer_range": 0.02,
120
+ "intermediate_size": 4096,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "layer_norm_eps": 1e-05,
128
+ "length_penalty": 1.0,
129
+ "max_length": 20,
130
+ "min_length": 0,
131
+ "model_type": "clip_vision_model",
132
+ "no_repeat_ngram_size": 0,
133
+ "num_attention_heads": 16,
134
+ "num_beam_groups": 1,
135
+ "num_beams": 1,
136
+ "num_channels": 3,
137
+ "num_hidden_layers": 24,
138
+ "num_return_sequences": 1,
139
+ "output_attentions": false,
140
+ "output_hidden_states": false,
141
+ "output_scores": false,
142
+ "pad_token_id": null,
143
+ "patch_size": 14,
144
+ "prefix": null,
145
+ "problem_type": null,
146
+ "projection_dim": 512,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.31.0",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
+ }
168
+ }
models/core/convert/stable-diffusion-safety-checker/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
3
+ size 1215979664
models/core/convert/stable-diffusion-safety-checker/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPFeatureExtractor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
models/core/upscaling/realesrgan/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff704c30ff560305e48ed1f4db895e525ab6bc81a46fafe80c0094a271c806d9
3
+ size 66922040
models/core/upscaling/realesrgan/RealESRGAN_x2plus.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49fafd45f8fd7aa8d31ab2a22d14d91b536c34494a5cfe31eb5d89c2fa266abb
3
+ size 67061725
models/core/upscaling/realesrgan/RealESRGAN_x4plus.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa0d38905f75ac06eb49a7951b426670021be3018265fd191d2125df9d682f1
3
+ size 67040989
models/core/upscaling/realesrgan/RealESRGAN_x4plus_anime_6B.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f872d837d3c90ed2e05227bed711af5671a6fd1c9f7d7e91c911a61f155e99da
3
+ size 17938799
models/sd-1/controlnet/canny/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.16.0.dev0",
4
+ "_name_or_path": "/home/patrick/controlnet_v1_1/control_v11p_sd15_canny",
5
+ "act_fn": "silu",
6
+ "attention_head_dim": 8,
7
+ "block_out_channels": [
8
+ 320,
9
+ 640,
10
+ 1280,
11
+ 1280
12
+ ],
13
+ "class_embed_type": null,
14
+ "conditioning_embedding_out_channels": [
15
+ 16,
16
+ 32,
17
+ 96,
18
+ 256
19
+ ],
20
+ "controlnet_conditioning_channel_order": "rgb",
21
+ "cross_attention_dim": 768,
22
+ "down_block_types": [
23
+ "CrossAttnDownBlock2D",
24
+ "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D",
26
+ "DownBlock2D"
27
+ ],
28
+ "downsample_padding": 1,
29
+ "flip_sin_to_cos": true,
30
+ "freq_shift": 0,
31
+ "in_channels": 4,
32
+ "layers_per_block": 2,
33
+ "mid_block_scale_factor": 1,
34
+ "norm_eps": 1e-05,
35
+ "norm_num_groups": 32,
36
+ "num_class_embeds": null,
37
+ "only_cross_attention": false,
38
+ "projection_class_embeddings_input_dim": null,
39
+ "resnet_time_scale_shift": "default",
40
+ "upcast_attention": false,
41
+ "use_linear_projection": false
42
+ }
models/sd-1/controlnet/canny/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be713fb941fc7c625f0c7d816b6a19115783a665f3049a8974f127e0c075d9a9
3
+ size 1445157124
models/sd-1/controlnet/depth/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.16.0.dev0",
4
+ "_name_or_path": "./",
5
+ "act_fn": "silu",
6
+ "attention_head_dim": 8,
7
+ "block_out_channels": [
8
+ 320,
9
+ 640,
10
+ 1280,
11
+ 1280
12
+ ],
13
+ "class_embed_type": null,
14
+ "conditioning_embedding_out_channels": [
15
+ 16,
16
+ 32,
17
+ 96,
18
+ 256
19
+ ],
20
+ "controlnet_conditioning_channel_order": "rgb",
21
+ "cross_attention_dim": 768,
22
+ "down_block_types": [
23
+ "CrossAttnDownBlock2D",
24
+ "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D",
26
+ "DownBlock2D"
27
+ ],
28
+ "downsample_padding": 1,
29
+ "flip_sin_to_cos": true,
30
+ "freq_shift": 0,
31
+ "in_channels": 4,
32
+ "layers_per_block": 2,
33
+ "mid_block_scale_factor": 1,
34
+ "norm_eps": 1e-05,
35
+ "norm_num_groups": 32,
36
+ "num_class_embeds": null,
37
+ "only_cross_attention": false,
38
+ "projection_class_embeddings_input_dim": null,
39
+ "resnet_time_scale_shift": "default",
40
+ "upcast_attention": false,
41
+ "use_linear_projection": false
42
+ }
models/sd-1/controlnet/depth/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999aca923ca5e19e70e6afc8d11073cc3c03553ca935b636bd5925df4a1c77d1
3
+ size 1445157124
models/sd-1/embedding/EasyNegative.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74b4e810b030f6b75fde959e2db678c268d07115b85356d3c0138ba5eb42340
3
+ size 24655
models/sd-1/lora/LowRA.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348071db544b7242c5edcb3306160d83bcde66395153c1daf38a575c5cefd66e
3
+ size 75610142
models/sd-1/main/darkSushi25D25D_v30/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPFeatureExtractor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
models/sd-1/main/darkSushi25D25D_v30/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.18.2",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPFeatureExtractor"
7
+ ],
8
+ "requires_safety_checker": true,
9
+ "safety_checker": [
10
+ "stable_diffusion",
11
+ "StableDiffusionSafetyChecker"
12
+ ],
13
+ "scheduler": [
14
+ "diffusers",
15
+ "PNDMScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "transformers",
19
+ "CLIPTextModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "UNet2DConditionModel"
28
+ ],
29
+ "vae": [
30
+ "diffusers",
31
+ "AutoencoderKL"
32
+ ]
33
+ }
models/sd-1/main/darkSushi25D25D_v30/safety_checker/config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "/content/db/models/core/convert/stable-diffusion-safety-checker",
4
+ "architectures": [
5
+ "StableDiffusionSafetyChecker"
6
+ ],
7
+ "initializer_factor": 1.0,
8
+ "logit_scale_init_value": 2.6592,
9
+ "model_type": "clip",
10
+ "projection_dim": 768,
11
+ "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 49406,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
+ "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 49407,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
+ "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
+ "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "projection_dim": 512,
66
+ "pruned_heads": {},
67
+ "remove_invalid_values": false,
68
+ "repetition_penalty": 1.0,
69
+ "return_dict": true,
70
+ "return_dict_in_generate": false,
71
+ "sep_token_id": null,
72
+ "suppress_tokens": null,
73
+ "task_specific_params": null,
74
+ "temperature": 1.0,
75
+ "tf_legacy_loss": false,
76
+ "tie_encoder_decoder": false,
77
+ "tie_word_embeddings": true,
78
+ "tokenizer_class": null,
79
+ "top_k": 50,
80
+ "top_p": 1.0,
81
+ "torch_dtype": null,
82
+ "torchscript": false,
83
+ "transformers_version": "4.31.0",
84
+ "typical_p": 1.0,
85
+ "use_bfloat16": false,
86
+ "vocab_size": 49408
87
+ },
88
+ "torch_dtype": "float32",
89
+ "transformers_version": null,
90
+ "vision_config": {
91
+ "_name_or_path": "",
92
+ "add_cross_attention": false,
93
+ "architectures": null,
94
+ "attention_dropout": 0.0,
95
+ "bad_words_ids": null,
96
+ "begin_suppress_tokens": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
+ "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "exponential_decay_length_penalty": null,
108
+ "finetuning_task": null,
109
+ "forced_bos_token_id": null,
110
+ "forced_eos_token_id": null,
111
+ "hidden_act": "quick_gelu",
112
+ "hidden_size": 1024,
113
+ "id2label": {
114
+ "0": "LABEL_0",
115
+ "1": "LABEL_1"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_factor": 1.0,
119
+ "initializer_range": 0.02,
120
+ "intermediate_size": 4096,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "layer_norm_eps": 1e-05,
128
+ "length_penalty": 1.0,
129
+ "max_length": 20,
130
+ "min_length": 0,
131
+ "model_type": "clip_vision_model",
132
+ "no_repeat_ngram_size": 0,
133
+ "num_attention_heads": 16,
134
+ "num_beam_groups": 1,
135
+ "num_beams": 1,
136
+ "num_channels": 3,
137
+ "num_hidden_layers": 24,
138
+ "num_return_sequences": 1,
139
+ "output_attentions": false,
140
+ "output_hidden_states": false,
141
+ "output_scores": false,
142
+ "pad_token_id": null,
143
+ "patch_size": 14,
144
+ "prefix": null,
145
+ "problem_type": null,
146
+ "projection_dim": 512,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.31.0",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
+ }
168
+ }
models/sd-1/main/darkSushi25D25D_v30/safety_checker/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
3
+ size 1215979664
models/sd-1/main/darkSushi25D25D_v30/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.18.2",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "timestep_spacing": "leading",
14
+ "trained_betas": null
15
+ }