Stable-X commited on
Commit
539a2d0
·
verified ·
1 Parent(s): 6cc725a

Upload 17 files

Browse files
README.md CHANGED
@@ -1,3 +1,16 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: reconviagen
3
+ pipeline_tag: mvimages-to-3d
4
+ license: mit
5
+ language:
6
+ - en
7
+ ---
8
+ # trellis vggt v0-1
9
+
10
+ <!-- Provide a quick summary of what the model is/does. -->
11
+
12
+ ReconViaGen was introduced in the paper [ReconViaGen: Towards Accurate Multi-view 3D Object Reconstruction via Generation].
13
+
14
+ Project page: https://jiahao620.github.io/reconviagen/
15
+
16
+ Code: https://github.com/GAP-LAB-CUHK-SZ/ReconViaGen
ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatGaussianDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "lr": {
15
+ "_xyz": 1.0,
16
+ "_features_dc": 1.0,
17
+ "_opacity": 1.0,
18
+ "_scaling": 1.0,
19
+ "_rotation": 0.1
20
+ },
21
+ "perturb_offset": true,
22
+ "voxel_size": 1.5,
23
+ "num_gaussians": 32,
24
+ "2d_filter_kernel_size": 0.1,
25
+ "3d_filter_kernel_size": 9e-4,
26
+ "scaling_bias": 4e-3,
27
+ "opacity_bias": 0.1,
28
+ "scaling_activation": "softplus"
29
+ }
30
+ }
31
+ }
ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38c84bcef5ce0af1f48b1b5558dabc7575a13346043c41a7e0610f1fa619a161
3
+ size 171450952
ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatMeshDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "use_color": true
15
+ }
16
+ }
17
+ }
ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e87aba94b5786407eb06d0502c1ed0885a0027a3f2b8537bfe15b0a92c01859
3
+ size 181903412
ckpts/slat_flow_img_dit_L_64l8p2_fp16.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatFlowModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 2,
13
+ "num_io_res_blocks": 2,
14
+ "io_block_channels": [128],
15
+ "pe_mode": "ape",
16
+ "qk_rms_norm": true
17
+ }
18
+ }
ckpts/slat_flow_img_dit_L_64l8p2_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9907b5e50f46e55601ce72419650f8ace13cdfd8acb8b209c7b2622fe4bfd2c6
3
+ size 1203755136
ckpts/slat_vggt_cond.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ModulatedSLATMultiViewCond",
3
+ "args": {
4
+ "channels": 1024,
5
+ "ctx_channels": 3072,
6
+ "use_fp16": true
7
+ }
8
+ }
ckpts/slat_vggt_cond.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18945872ec1a5fc13b2bfe86f279777c895c015ddff5cabc8f5a6dcd245d862a
3
+ size 100697560
ckpts/ss_dec_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureDecoder",
4
+ "args": {
5
+ "out_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [512, 128, 32],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_dec_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
3
+ size 147591972
ckpts/ss_flow_img_dit_L_16l8_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlowModel",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 1,
13
+ "pe_mode": "ape",
14
+ "qk_rms_norm": true,
15
+ "use_fp16": true
16
+ }
17
+ }
ckpts/ss_flow_img_dit_L_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876df6751febc412d3e6f62c686f59da1634e4d3c4edba44fdf6a749bdbba73a
3
+ size 1130770840
ckpts/ss_vggt_cond.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ModulatedMultiViewCond",
3
+ "args": {
4
+ "channels": 1024,
5
+ "ctx_channels": 3072,
6
+ "num_heads": 16,
7
+ "mlp_ratio": 4,
8
+ "attn_mode": "full",
9
+ "use_checkpoint": false,
10
+ "use_rope": false,
11
+ "share_mod": false,
12
+ "qk_rms_norm": true,
13
+ "qk_rms_norm_cross": false
14
+ }
15
+ }
ckpts/ss_vggt_cond.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fad4d17c8d6914c2320230ae24971ed6c6569a678e7d5c260f802e66895704
3
+ size 352607808
pipeline.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "TrellisVGGTTo3DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "ckpts/ss_flow_img_dit_L_16l8_fp16",
7
+ "slat_decoder_gs": "ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16",
8
+ "slat_decoder_mesh": "ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16",
9
+ "slat_flow_model": "ckpts/slat_flow_img_dit_L_64l8p2_fp16",
10
+ "sparse_structure_vggt_cond": "ckpts/ss_vggt_cond",
11
+ "slat_vggt_cond": "ckpts/slat_vggt_cond"
12
+ },
13
+ "sparse_structure_sampler": {
14
+ "name": "FlowEulerGuidanceIntervalSampler",
15
+ "args": {
16
+ "sigma_min": 1e-5
17
+ },
18
+ "params": {
19
+ "steps": 25,
20
+ "cfg_strength": 5.0,
21
+ "cfg_interval": [0.5, 1.0],
22
+ "rescale_t": 3.0
23
+ }
24
+ },
25
+ "slat_sampler": {
26
+ "name": "FlowEulerGuidanceIntervalSampler",
27
+ "args": {
28
+ "sigma_min": 1e-5
29
+ },
30
+ "params": {
31
+ "steps": 25,
32
+ "cfg_strength": 5.0,
33
+ "cfg_interval": [0.5, 1.0],
34
+ "rescale_t": 3.0
35
+ }
36
+ },
37
+ "slat_normalization": {
38
+ "mean": [
39
+ -2.1687545776367188,
40
+ -0.004347046371549368,
41
+ -0.13352349400520325,
42
+ -0.08418072760105133,
43
+ -0.5271206498146057,
44
+ 0.7238689064979553,
45
+ -1.1414450407028198,
46
+ 1.2039363384246826
47
+ ],
48
+ "std": [
49
+ 2.377650737762451,
50
+ 2.386378288269043,
51
+ 2.124418020248413,
52
+ 2.1748552322387695,
53
+ 2.663944721221924,
54
+ 2.371192216873169,
55
+ 2.6217446327209473,
56
+ 2.684523105621338
57
+ ]
58
+ },
59
+ "image_cond_model": "dinov2_vitl14_reg"
60
+ }
61
+ }