lizb6626 commited on
Commit
24e0558
·
verified ·
1 Parent(s): 271e47c

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,3 +1,14 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: ss4d
3
+ pipeline_tag: image-to-3d
4
+ license: mit
5
+ language:
6
+ - en
7
+ ---
8
+ # SS4D
9
+
10
+ Official model weights for *SS4D: Native 4D Generative Model via Structured Spacetime Latents*
11
+
12
+ Project page: https://lizb6626.github.io/SS4D/
13
+
14
+ Code: https://github.com/Lizb6626/SS4D/
ckpts/slat_dec_gs_4d.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatGaussianDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "pe_mode": "rope_t",
13
+ "use_fp16": true,
14
+ "use_checkpoint": true,
15
+ "qk_rms_norm": false,
16
+ "expand_self_attn": true,
17
+ "representation_config": {
18
+ "lr": {
19
+ "_xyz": 1.0,
20
+ "_features_dc": 1.0,
21
+ "_opacity": 1.0,
22
+ "_scaling": 1.0,
23
+ "_rotation": 0.1
24
+ },
25
+ "perturb_offset": true,
26
+ "voxel_size": 1.5,
27
+ "num_gaussians": 32,
28
+ "2d_filter_kernel_size": 0.1,
29
+ "3d_filter_kernel_size": 0.0009,
30
+ "scaling_bias": 0.004,
31
+ "opacity_bias": 0.1,
32
+ "scaling_activation": "softplus"
33
+ }
34
+ }
35
+ }
ckpts/slat_dec_gs_4d.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9033301cc6a56a0369e3009deb5a9fdd0528cc8f6a82e6a27bc87048a94da1e0
3
+ size 341486240
ckpts/slat_enc_4d.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatEncoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 1024,
6
+ "model_channels": 768,
7
+ "latent_channels": 8,
8
+ "num_blocks": 12,
9
+ "num_heads": 12,
10
+ "mlp_ratio": 4,
11
+ "attn_mode": "swin",
12
+ "window_size": 8,
13
+ "pe_mode": "rope_t",
14
+ "use_fp16": true,
15
+ "use_checkpoint": true,
16
+ "qk_rms_norm": false,
17
+ "expand_self_attn": true
18
+ }
19
+ }
ckpts/slat_enc_4d.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b800687c94726142a71f3f491b7fa00d3bc2adebd5accef563ee086d2bb6c55e
3
+ size 343278088
ckpts/slat_flow_video_4d.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatFlow4DModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 2,
13
+ "num_io_res_blocks": 2,
14
+ "num_io_res4d_blocks": 2,
15
+ "io_block_channels": [
16
+ 128
17
+ ],
18
+ "io_block_channels_t": [
19
+ 1024
20
+ ],
21
+ "pe_mode": "rope_t",
22
+ "qk_rms_norm": true,
23
+ "use_fp16": true,
24
+ "expand_self_attn": true,
25
+ "use_checkpoint": true,
26
+ "conv_type": "3D_1D"
27
+ }
28
+ }
ckpts/slat_flow_video_4d.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af933cecb213c8a2a1a7a6dd3f2b14fcbdea101b764fef6f878acef16ce2528b
3
+ size 2578072200
ckpts/ss_dec_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureDecoder",
4
+ "args": {
5
+ "out_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [512, 128, 32],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_dec_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
3
+ size 147591972
ckpts/ss_enc_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureEncoder",
4
+ "args": {
5
+ "in_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [32, 128, 512],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_enc_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107874eeaa0feb82f51b19db5da7db534fb7e7f19e5a122b9ff1bc2e258bfc6d
3
+ size 119068016
ckpts/ss_flow_video_4d.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlow4DModel",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 1,
13
+ "pe_mode": "rope_t",
14
+ "qk_rms_norm": true,
15
+ "use_fp16": true,
16
+ "expand_self_attn": true,
17
+ "use_checkpoint": true,
18
+ "window_size": 8
19
+ }
20
+ }
ckpts/ss_flow_video_4d.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e65734996e541660da54c5f15c0cf674cec738edeb35d6c7f2a508869f25ca06
3
+ size 2239002104
pipeline.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "TrellisVideoTo4DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "ckpts/ss_flow_video_4d",
7
+ "slat_decoder_gs": "ckpts/slat_dec_gs_4d",
8
+ "slat_flow_model": "ckpts/slat_flow_video_4d"
9
+ },
10
+ "sparse_structure_sampler": {
11
+ "name": "FlowEulerGuidanceIntervalSampler",
12
+ "args": {
13
+ "sigma_min": 1e-5
14
+ },
15
+ "params": {
16
+ "steps": 25,
17
+ "cfg_strength": 5.0,
18
+ "cfg_interval": [0.5, 1.0],
19
+ "rescale_t": 3.0
20
+ }
21
+ },
22
+ "slat_sampler": {
23
+ "name": "FlowEulerGuidanceIntervalSampler",
24
+ "args": {
25
+ "sigma_min": 1e-5
26
+ },
27
+ "params": {
28
+ "steps": 25,
29
+ "cfg_strength": 5.0,
30
+ "cfg_interval": [0.5, 1.0],
31
+ "rescale_t": 3.0
32
+ }
33
+ },
34
+ "slat_normalization": {
35
+ "mean": [
36
+ -2.826356,
37
+ 0.33626583,
38
+ 0.0763019,
39
+ -0.117598,
40
+ -0.4892621,
41
+ 1.0101169,
42
+ -1.0808244,
43
+ 1.4578744
44
+ ],
45
+ "std": [
46
+ 2.8175173,
47
+ 2.6447911,
48
+ 2.5076609,
49
+ 2.3946748,
50
+ 3.438241,
51
+ 2.864445,
52
+ 3.2401907,
53
+ 3.1412923
54
+ ]
55
+ },
56
+ "image_cond_model": "dinov2_vitl14_reg"
57
+ }
58
+ }