Felldude commited on
Commit
19e153a
·
verified ·
1 Parent(s): b1a81df

Upload folder using huggingface_hub

Browse files
model_index.json CHANGED
@@ -1,29 +1,29 @@
1
- {
2
- "_class_name": "StableDiffusionXLPipeline",
3
- "_diffusers_version": "0.24.0",
4
-
5
- "tokenizer": [
6
- "stabilityai/stable-diffusion-xl-base-1.0",
7
- "tokenizer"
8
- ],
9
- "tokenizer_2": [
10
- "stabilityai/stable-diffusion-xl-base-1.0",
11
- "tokenizer_2"
12
- ],
13
- "text_encoder": [
14
- "Felldude/PONY_FP32",
15
- "text_encoder"
16
- ],
17
- "text_encoder_2": [
18
- "Felldude/PONY_FP32",
19
- "text_encoder_2"
20
- ],
21
- "unet": [
22
- "Felldude/PONY_FP32",
23
- "unet"
24
- ],
25
- "vae": [
26
- "stabilityai/stable-diffusion-xl-base-1.0",
27
- "vae"
28
- ]
29
- }
 
1
+ {
2
+ "_class_name": "StableDiffusionXLPipeline",
3
+ "_diffusers_version": "0.24.0",
4
+
5
+ "tokenizer": [
6
+ "stabilityai/stable-diffusion-xl-base-1.0",
7
+ "tokenizer"
8
+ ],
9
+ "tokenizer_2": [
10
+ "stabilityai/stable-diffusion-xl-base-1.0",
11
+ "tokenizer_2"
12
+ ],
13
+ "text_encoder": [
14
+ "Felldude/PONY_FP32",
15
+ "text_encoder"
16
+ ],
17
+ "text_encoder_2": [
18
+ "Felldude/PONY_FP32",
19
+ "text_encoder_2"
20
+ ],
21
+ "unet": [
22
+ "Felldude/PONY_FP32",
23
+ "unet"
24
+ ],
25
+ "vae": [
26
+ "stabilityai/stable-diffusion-xl-base-1.0",
27
+ "vae"
28
+ ]
29
+ }
text_encoder/config.json CHANGED
@@ -1,24 +1,24 @@
1
- {
2
- "architectures": [
3
- "CLIPTextModel"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 0,
7
- "dropout": 0.0,
8
- "eos_token_id": 2,
9
- "hidden_act": "quick_gelu",
10
- "hidden_size": 768,
11
- "initializer_factor": 1.0,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-05,
15
- "max_position_embeddings": 77,
16
- "model_type": "clip_text_model",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 1,
20
- "projection_dim": 768,
21
- "torch_dtype": "float32",
22
- "transformers_version": "4.32.0.dev0",
23
- "vocab_size": 49408
24
  }
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "quick_gelu",
10
+ "hidden_size": 768,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
  }
text_encoder_2/config.json CHANGED
@@ -1,24 +1,24 @@
1
- {
2
- "architectures": [
3
- "CLIPTextModelWithProjection"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 0,
7
- "dropout": 0.0,
8
- "eos_token_id": 2,
9
- "hidden_act": "gelu",
10
- "hidden_size": 1280,
11
- "initializer_factor": 1.0,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 5120,
14
- "layer_norm_eps": 1e-05,
15
- "max_position_embeddings": 77,
16
- "model_type": "clip_text_model",
17
- "num_attention_heads": 20,
18
- "num_hidden_layers": 32,
19
- "pad_token_id": 1,
20
- "projection_dim": 1280,
21
- "torch_dtype": "float32",
22
- "transformers_version": "4.32.0.dev0",
23
- "vocab_size": 49408
24
- }
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModelWithProjection"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_size": 1280,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 20,
18
+ "num_hidden_layers": 32,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 1280,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
+ }
unet/config.json CHANGED
@@ -1,69 +1,69 @@
1
- {
2
- "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.19.0.dev0",
4
- "act_fn": "silu",
5
- "addition_embed_type": "text_time",
6
- "addition_embed_type_num_heads": 64,
7
- "addition_time_embed_dim": 256,
8
- "attention_head_dim": [
9
- 5,
10
- 10,
11
- 20
12
- ],
13
- "block_out_channels": [
14
- 320,
15
- 640,
16
- 1280
17
- ],
18
- "center_input_sample": false,
19
- "class_embed_type": null,
20
- "class_embeddings_concat": false,
21
- "conv_in_kernel": 3,
22
- "conv_out_kernel": 3,
23
- "cross_attention_dim": 2048,
24
- "cross_attention_norm": null,
25
- "down_block_types": [
26
- "DownBlock2D",
27
- "CrossAttnDownBlock2D",
28
- "CrossAttnDownBlock2D"
29
- ],
30
- "downsample_padding": 1,
31
- "dual_cross_attention": false,
32
- "encoder_hid_dim": null,
33
- "encoder_hid_dim_type": null,
34
- "flip_sin_to_cos": true,
35
- "freq_shift": 0,
36
- "in_channels": 4,
37
- "layers_per_block": 2,
38
- "mid_block_only_cross_attention": null,
39
- "mid_block_scale_factor": 1,
40
- "mid_block_type": "UNetMidBlock2DCrossAttn",
41
- "norm_eps": 1e-05,
42
- "norm_num_groups": 32,
43
- "num_attention_heads": null,
44
- "num_class_embeds": null,
45
- "only_cross_attention": false,
46
- "out_channels": 4,
47
- "projection_class_embeddings_input_dim": 2816,
48
- "resnet_out_scale_factor": 1.0,
49
- "resnet_skip_time_act": false,
50
- "resnet_time_scale_shift": "default",
51
- "sample_size": 128,
52
- "time_cond_proj_dim": null,
53
- "time_embedding_act_fn": null,
54
- "time_embedding_dim": null,
55
- "time_embedding_type": "positional",
56
- "timestep_post_act": null,
57
- "transformer_layers_per_block": [
58
- 1,
59
- 2,
60
- 10
61
- ],
62
- "up_block_types": [
63
- "CrossAttnUpBlock2D",
64
- "CrossAttnUpBlock2D",
65
- "UpBlock2D"
66
- ],
67
- "upcast_attention": null,
68
- "use_linear_projection": true
69
- }
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.19.0.dev0",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": "text_time",
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": 256,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20
12
+ ],
13
+ "block_out_channels": [
14
+ 320,
15
+ 640,
16
+ 1280
17
+ ],
18
+ "center_input_sample": false,
19
+ "class_embed_type": null,
20
+ "class_embeddings_concat": false,
21
+ "conv_in_kernel": 3,
22
+ "conv_out_kernel": 3,
23
+ "cross_attention_dim": 2048,
24
+ "cross_attention_norm": null,
25
+ "down_block_types": [
26
+ "DownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "CrossAttnDownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dual_cross_attention": false,
32
+ "encoder_hid_dim": null,
33
+ "encoder_hid_dim_type": null,
34
+ "flip_sin_to_cos": true,
35
+ "freq_shift": 0,
36
+ "in_channels": 4,
37
+ "layers_per_block": 2,
38
+ "mid_block_only_cross_attention": null,
39
+ "mid_block_scale_factor": 1,
40
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
41
+ "norm_eps": 1e-05,
42
+ "norm_num_groups": 32,
43
+ "num_attention_heads": null,
44
+ "num_class_embeds": null,
45
+ "only_cross_attention": false,
46
+ "out_channels": 4,
47
+ "projection_class_embeddings_input_dim": 2816,
48
+ "resnet_out_scale_factor": 1.0,
49
+ "resnet_skip_time_act": false,
50
+ "resnet_time_scale_shift": "default",
51
+ "sample_size": 128,
52
+ "time_cond_proj_dim": null,
53
+ "time_embedding_act_fn": null,
54
+ "time_embedding_dim": null,
55
+ "time_embedding_type": "positional",
56
+ "timestep_post_act": null,
57
+ "transformer_layers_per_block": [
58
+ 1,
59
+ 2,
60
+ 10
61
+ ],
62
+ "up_block_types": [
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D",
65
+ "UpBlock2D"
66
+ ],
67
+ "upcast_attention": null,
68
+ "use_linear_projection": true
69
+ }