KBlueLeaf commited on
Commit
869a265
·
verified ·
1 Parent(s): 5a70cc6

Upload folder using huggingface_hub

Browse files
pretrain_imgnet_B_qknorm.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 16,
7
+ "hidden_size": 1024,
8
+ "intermediate_size": 4096,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": true
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "imagenet",
31
+ "source_ckpt": "ttvidt-dit-pretrain/rbzlbc57/checkpoints/epoch=19-step=100000.ckpt"
32
+ }
pretrain_imgnet_B_qknorm.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23738d11fa3b93c3ff46b478ec7472ad3e8e2a517b6d7c5a6825448a3f04927d
3
+ size 1548691368
pretrain_imgnet_B_qknorm_nofinal.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 16,
7
+ "hidden_size": 1024,
8
+ "intermediate_size": 4096,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": false
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "imagenet",
31
+ "source_ckpt": "ttvidt-dit-pretrain/dkskempu/checkpoints/epoch=19-step=100000.ckpt"
32
+ }
pretrain_imgnet_B_qknorm_nofinal.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8912e682705591d7516a0ce8b2ee77d5e86c40edc1606566690c1a0f06b36256
3
+ size 1548687192
pretrain_imgnet_L_1152d28l.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 28,
7
+ "hidden_size": 1152,
8
+ "intermediate_size": 3456,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion"
11
+ },
12
+ "encoder_hidden_size": 768,
13
+ "latent_h": 32,
14
+ "latent_w": 32,
15
+ "latent_mean": [
16
+ -0.69,
17
+ -0.48,
18
+ -0.6,
19
+ 0.28
20
+ ],
21
+ "latent_std": [
22
+ 12.38,
23
+ 11.22,
24
+ 7.93,
25
+ 21.22
26
+ ],
27
+ "pretrain_mode": "imagenet",
28
+ "source_ckpt": "ttvidt-dit-pretrain/ibh0mukg/checkpoints/epoch=19-step=100000.ckpt"
29
+ }
pretrain_imgnet_L_1152d28l.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb10311cb4bfb7a165bf732de3d58f3493d87ac0980ccaa7e6d5d69a6c166d5
3
+ size 2985899600
pretrain_imgnet_L_qknorm.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 28,
7
+ "hidden_size": 1152,
8
+ "intermediate_size": 3456,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": true
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "imagenet",
31
+ "source_ckpt": "ttvidt-dit-pretrain/d14nc69r/checkpoints/epoch=19-step=100000.ckpt"
32
+ }
pretrain_imgnet_L_qknorm.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b6630f9bd67bf1d3ea05ef732619b76ee57f1ff27c6c12d6dde2b140535151
3
+ size 2979568560
pretrain_imgnet_S_qknorm.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 12,
7
+ "hidden_size": 768,
8
+ "intermediate_size": 3072,
9
+ "num_heads": 12,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": true
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "imagenet",
31
+ "source_ckpt": "ttvidt-dit-pretrain/tsiyjsvh/checkpoints/epoch=19-step=100000.ckpt"
32
+ }
pretrain_imgnet_S_qknorm.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d3b381d18602df0b0d97ea8557211de79c79e50d1e339c90f3bc483134f66f
3
+ size 654121648
pretrain_imgnet_S_qknorm_nofinal.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 12,
7
+ "hidden_size": 768,
8
+ "intermediate_size": 3072,
9
+ "num_heads": 12,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": false
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "imagenet",
31
+ "source_ckpt": "ttvidt-dit-pretrain/gwv1kxjd/checkpoints/epoch=19-step=100000.ckpt"
32
+ }
pretrain_imgnet_S_qknorm_nofinal.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f57d0647744439b4c48219ac433eb21ffcff4ef46d6b07d6a859be813c0f6a5c
3
+ size 654118496
pretrain_video_B_1024d16l.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 16,
7
+ "hidden_size": 1024,
8
+ "intermediate_size": 4096,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion"
11
+ },
12
+ "encoder_hidden_size": 768,
13
+ "latent_h": 32,
14
+ "latent_w": 32,
15
+ "latent_mean": [
16
+ -0.69,
17
+ -0.48,
18
+ -0.6,
19
+ 0.28
20
+ ],
21
+ "latent_std": [
22
+ 12.38,
23
+ 11.22,
24
+ 7.93,
25
+ 21.22
26
+ ],
27
+ "pretrain_mode": "video",
28
+ "source_ckpt": "ttvidt-dit-pretrain/7isfknx9/checkpoints/epoch=14-step=100000.ckpt"
29
+ }
pretrain_video_B_1024d16l.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47fe8dd97160a15d65bc8a0f295590b448c6895e21ef37b9e42ab84a879f31fb
3
+ size 1553397056
pretrain_video_L_1152d28l_80k.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 28,
7
+ "hidden_size": 1152,
8
+ "intermediate_size": 3456,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion"
11
+ },
12
+ "encoder_hidden_size": 768,
13
+ "latent_h": 32,
14
+ "latent_w": 32,
15
+ "latent_mean": [
16
+ -0.69,
17
+ -0.48,
18
+ -0.6,
19
+ 0.28
20
+ ],
21
+ "latent_std": [
22
+ 12.38,
23
+ 11.22,
24
+ 7.93,
25
+ 21.22
26
+ ],
27
+ "pretrain_mode": "video",
28
+ "source_ckpt": "ttvidt-dit-pretrain/2v3xqhf2/checkpoints/epoch=11-step=80000.ckpt"
29
+ }
pretrain_video_L_1152d28l_80k.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:258ef38f5f41c9a1d073f1934aa58582f12b824b13cc91303e1fee7787784e28
3
+ size 2985899600
pretrain_video_L_qknorm.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 28,
7
+ "hidden_size": 1152,
8
+ "intermediate_size": 3456,
9
+ "num_heads": 16,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": true
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "video",
31
+ "source_ckpt": "ttvidt-dit-pretrain/j7hkyq3z/checkpoints/epoch=14-step=100000.ckpt"
32
+ }
pretrain_video_L_qknorm.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e3b7b1d430c64c767f9a50bd1b1dd351d0e9597533f8f7adc39392bd75ddf1
3
+ size 2979568560
pretrain_video_S_qknorm.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 12,
7
+ "hidden_size": 768,
8
+ "intermediate_size": 3072,
9
+ "num_heads": 12,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": true
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "video",
31
+ "source_ckpt": "ttvidt-dit-pretrain/baec0kfz/checkpoints/epoch=14-step=100000.ckpt"
32
+ }
pretrain_video_S_qknorm.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79067d2ecc5355f44bcaa6fb88baf383db787bf770ffdef77bb7875fda100a28
3
+ size 654121648
pretrain_video_S_qknorm_nofinal.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_config": {
3
+ "image_dim": 4,
4
+ "patch_size": 2,
5
+ "decoder_style": "dit",
6
+ "num_layers": 12,
7
+ "hidden_size": 768,
8
+ "intermediate_size": 3072,
9
+ "num_heads": 12,
10
+ "decode_mode": "diffusion",
11
+ "qk_norm": true,
12
+ "attn_bias": false,
13
+ "use_final_norm": false
14
+ },
15
+ "encoder_hidden_size": 768,
16
+ "latent_h": 32,
17
+ "latent_w": 32,
18
+ "latent_mean": [
19
+ -0.69,
20
+ -0.48,
21
+ -0.6,
22
+ 0.28
23
+ ],
24
+ "latent_std": [
25
+ 12.38,
26
+ 11.22,
27
+ 7.93,
28
+ 21.22
29
+ ],
30
+ "pretrain_mode": "video",
31
+ "source_ckpt": "ttvidt-dit-pretrain/1c4ts6z2/checkpoints/epoch=14-step=100000.ckpt"
32
+ }
pretrain_video_S_qknorm_nofinal.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e63d0b9b2485e3857647e4f0ba81283609a507f7a2d69ff7b7ac583b27c24f6b
3
+ size 654118496