hbb1 commited on
Commit
bc38d94
·
verified ·
1 Parent(s): 40ef5ad

Upload folder using huggingface_hub

Browse files
ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatGaussianDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "lr": {
15
+ "_xyz": 1.0,
16
+ "_features_dc": 1.0,
17
+ "_opacity": 1.0,
18
+ "_scaling": 1.0,
19
+ "_rotation": 0.1
20
+ },
21
+ "perturb_offset": true,
22
+ "voxel_size": 1.5,
23
+ "num_gaussians": 32,
24
+ "2d_filter_kernel_size": 0.1,
25
+ "3d_filter_kernel_size": 9e-4,
26
+ "scaling_bias": 4e-3,
27
+ "opacity_bias": 0.1,
28
+ "scaling_activation": "softplus"
29
+ }
30
+ }
31
+ }
ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38c84bcef5ce0af1f48b1b5558dabc7575a13346043c41a7e0610f1fa619a161
3
+ size 171450952
ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatMeshDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "use_color": true
15
+ }
16
+ }
17
+ }
ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e87aba94b5786407eb06d0502c1ed0885a0027a3f2b8537bfe15b0a92c01859
3
+ size 181903412
ckpts/slat_dec_rf_swin8_B_64l8r16_fp16.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatRadianceFieldDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "rank": 16,
15
+ "dim": 8
16
+ }
17
+ }
18
+ }
ckpts/slat_dec_rf_swin8_B_64l8r16_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737da6578d01948016b7c39786113af0d64a46f7922f6b8b5e698b84643be514
3
+ size 171450488
ckpts/slat_enc_swin8_B_64l8_fp16.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatEncoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 1024,
6
+ "model_channels": 768,
7
+ "latent_channels": 8,
8
+ "num_blocks": 12,
9
+ "num_heads": 12,
10
+ "mlp_ratio": 4,
11
+ "attn_mode": "swin",
12
+ "window_size": 8,
13
+ "use_fp16": true
14
+ }
15
+ }
ckpts/slat_enc_swin8_B_64l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21dceac6bee917ab6458ff52c9757ba89a779d03031c7bd17f9e7f0103bfd436
3
+ size 173242816
ckpts/slat_flow_img_dit_L_64l8p2_fp16.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ElasticVisualLatentConditioningSLatFlowModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "cond_latent_channels": 8,
10
+ "visual_feat_channels": [
11
+ 4,
12
+ 4,
13
+ 4,
14
+ 4
15
+ ],
16
+ "visual_feat_resolution": 256,
17
+ "visual_conv_kernel_size": 3,
18
+ "num_blocks": 24,
19
+ "num_heads": 16,
20
+ "mlp_ratio": 4,
21
+ "patch_size": 2,
22
+ "num_io_res_blocks": 2,
23
+ "io_block_channels": [
24
+ 128
25
+ ],
26
+ "pe_mode": "ape",
27
+ "qk_rms_norm": true,
28
+ "use_fp16": true
29
+ }
30
+ }
ckpts/slat_flow_img_dit_L_64l8p2_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7240e382aeaa2e1e82aafd8da57996b5493c569c01f737a00c98a6e11bf28571
3
+ size 2401799952
ckpts/ss_dec_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureDecoder",
4
+ "args": {
5
+ "out_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [512, 128, 32],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_dec_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
3
+ size 147591972
ckpts/ss_enc_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureEncoder",
4
+ "args": {
5
+ "in_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [32, 128, 512],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_enc_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107874eeaa0feb82f51b19db5da7db534fb7e7f19e5a122b9ff1bc2e258bfc6d
3
+ size 119068016
ckpts/ss_flow_img_dit_L_16l8_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlowModel",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 1,
13
+ "pe_mode": "ape",
14
+ "qk_rms_norm": true,
15
+ "use_fp16": true
16
+ }
17
+ }
ckpts/ss_flow_img_dit_L_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96dc6bfd4136fd950af564dd16b4ae533c9ba6af8f26c670646b2a9f2789b1db
3
+ size 1130770840
ckpts/suv_flow_img_dit_L_16l8_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlowModel",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 16,
6
+ "out_channels": 16,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 1,
13
+ "pe_mode": "ape",
14
+ "qk_rms_norm": true,
15
+ "use_fp16": true
16
+ }
17
+ }
ckpts/suv_flow_img_dit_L_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d0b50ffbb70295b5f6528180d0671d0d71bcf55a19e8c51e0fd19229aab459
3
+ size 2239067672
ckpts/uv_vae_conv3d_16l8_fp16_decoder.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureDecoder",
3
+ "args": {
4
+ "out_channels": 3,
5
+ "latent_channels": 8,
6
+ "num_res_blocks": 2,
7
+ "num_res_blocks_middle": 2,
8
+ "channels": [
9
+ 512,
10
+ 128,
11
+ 32
12
+ ],
13
+ "use_fp16": true
14
+ }
15
+ }
ckpts/uv_vae_conv3d_16l8_fp16_decoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec4dd80a64e82806ce0633a772f49eb2cb0ab7f3e8c87d7c2643b9b49a334d6
3
+ size 294698604
ckpts/uv_vae_conv3d_16l8_fp16_encoder.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureEncoder",
3
+ "args": {
4
+ "in_channels": 3,
5
+ "latent_channels": 8,
6
+ "num_res_blocks": 2,
7
+ "num_res_blocks_middle": 2,
8
+ "channels": [
9
+ 32,
10
+ 128,
11
+ 512
12
+ ],
13
+ "use_fp16": true
14
+ }
15
+ }
ckpts/uv_vae_conv3d_16l8_fp16_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24d59f1e93451e19fb9a9118311452793cb7e5dec68c5fe69c7ec8ed926d41d
3
+ size 237206376
pipeline.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Cupid3DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "ckpts/suv_flow_img_dit_L_16l8_fp16",
7
+ "slat_decoder_gs": "ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16",
8
+ "slat_decoder_rf": "ckpts/slat_dec_rf_swin8_B_64l8r16_fp16",
9
+ "slat_decoder_mesh": "ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16",
10
+ "slat_flow_model": "ckpts/slat_flow_img_dit_L_64l8p2_fp16",
11
+ "sparse_structure_uv_decoder": "ckpts/uv_vae_conv3d_16l8_fp16_decoder",
12
+ "sparse_structure_uv_encoder": "ckpts/uv_vae_conv3d_16l8_fp16_encoder",
13
+ "sparse_structure_encoder": "ckpts/ss_enc_conv3d_16l8_fp16"
14
+ },
15
+ "sparse_structure_sampler": {
16
+ "name": "FlowEulerGuidanceIntervalSampler",
17
+ "args": {
18
+ "sigma_min": 1e-05
19
+ },
20
+ "params": {
21
+ "steps": 25,
22
+ "cfg_strength": 5.0,
23
+ "cfg_interval": [
24
+ 0.5,
25
+ 1.0
26
+ ],
27
+ "rescale_t": 3.0
28
+ }
29
+ },
30
+ "slat_sampler": {
31
+ "name": "FlowEulerGuidanceIntervalSampler",
32
+ "args": {
33
+ "sigma_min": 1e-05
34
+ },
35
+ "params": {
36
+ "steps": 25,
37
+ "cfg_strength": 5.0,
38
+ "cfg_interval": [
39
+ 0.5,
40
+ 1.0
41
+ ],
42
+ "rescale_t": 3.0
43
+ }
44
+ },
45
+ "slat_normalization": {
46
+ "mean": [
47
+ -2.1687545776367188,
48
+ -0.004347046371549368,
49
+ -0.13352349400520325,
50
+ -0.08418072760105133,
51
+ -0.5271206498146057,
52
+ 0.7238689064979553,
53
+ -1.1414450407028198,
54
+ 1.2039363384246826
55
+ ],
56
+ "std": [
57
+ 2.377650737762451,
58
+ 2.386378288269043,
59
+ 2.124418020248413,
60
+ 2.1748552322387695,
61
+ 2.663944721221924,
62
+ 2.371192216873169,
63
+ 2.6217446327209473,
64
+ 2.684523105621338
65
+ ]
66
+ },
67
+ "image_cond_model": "dinov2_vitl14_reg"
68
+ }
69
+ }