sm0ky commited on
Commit
24f9a4e
·
verified ·
1 Parent(s): 681141d

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Amodal3R_ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatGaussianDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "lr": {
15
+ "_xyz": 1.0,
16
+ "_features_dc": 1.0,
17
+ "_opacity": 1.0,
18
+ "_scaling": 1.0,
19
+ "_rotation": 0.1
20
+ },
21
+ "perturb_offset": true,
22
+ "voxel_size": 1.5,
23
+ "num_gaussians": 32,
24
+ "2d_filter_kernel_size": 0.1,
25
+ "3d_filter_kernel_size": 9e-4,
26
+ "scaling_bias": 4e-3,
27
+ "opacity_bias": 0.1,
28
+ "scaling_activation": "softplus"
29
+ }
30
+ }
31
+ }
Amodal3R_ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38c84bcef5ce0af1f48b1b5558dabc7575a13346043c41a7e0610f1fa619a161
3
+ size 171450952
Amodal3R_ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatMeshDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "latent_channels": 8,
7
+ "num_blocks": 12,
8
+ "num_heads": 12,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "use_color": true
15
+ }
16
+ }
17
+ }
Amodal3R_ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e87aba94b5786407eb06d0502c1ed0885a0027a3f2b8537bfe15b0a92c01859
3
+ size 181903412
Amodal3R_ckpts/slat_flow_img_dit_L_64l8p2_fp16_doubleattn_weighted.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatFlowModelMaskAsCondWeighted",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 2,
13
+ "num_io_res_blocks": 2,
14
+ "io_block_channels": [128],
15
+ "pe_mode": "ape",
16
+ "qk_rms_norm": true,
17
+ "use_fp16": true,
18
+ "mask_cond_type": "mask_patcher"
19
+ }
20
+ }
Amodal3R_ckpts/slat_flow_img_dit_L_64l8p2_fp16_doubleattn_weighted.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d218d4b147828563cd72f6dfc8fbf3489ad4a399aa4ea3ec060686924c0f3b
3
+ size 2804847144
Amodal3R_ckpts/ss_dec_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureDecoder",
4
+ "args": {
5
+ "out_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [512, 128, 32],
10
+ "use_fp16": true
11
+ }
12
+ }
Amodal3R_ckpts/ss_dec_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
3
+ size 147591972
Amodal3R_ckpts/ss_flow_img_dit_L_16l8_fp16_doubleattn_weighted.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlowModelMaskAsCondWeighted",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 1,
13
+ "pe_mode": "ape",
14
+ "qk_rms_norm": true,
15
+ "use_fp16": true,
16
+ "mask_cond_type": "mask_patcher"
17
+ }
18
+ }
Amodal3R_ckpts/ss_flow_img_dit_L_16l8_fp16_doubleattn_weighted.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d513798910c91ff0ee68f43404b9efc4887794702d45bf7b348a7bafcc9e29d
3
+ size 2642064600
README.md CHANGED
@@ -1,3 +1,13 @@
1
  ---
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-4.0
3
  ---
4
+
5
+ # **Amodal3R**: Amodal 3D Reconstruction from Occluded 2D Images
6
+
7
+ Given partially visible objects within images, Amodal3R reconstructs semantically meaningful 3D assets with reasonable geometry and plausible appearance.
8
+
9
+ Arxiv:
10
+
11
+ Project Page: https://sm0kywu.github.io/Amodal3R/
12
+
13
+ Our model is built upon the "foundation" model [TRELLIS](https://trellis3d.github.io/). The pre-trained model weights are fetched from https://huggingface.co/JeffreyXiang/TRELLIS-image-large. Thanks to their impressive work!!!
pipeline.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Amodal3RImageTo3DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "Amodal3R_ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "Amodal3R_ckpts/ss_flow_img_dit_L_16l8_fp16_doubleattn_weighted",
7
+ "slat_decoder_gs": "Amodal3R_ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16",
8
+ "slat_decoder_mesh": "Amodal3R_ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16",
9
+ "slat_flow_model": "Amodal3R_ckpts/slat_flow_img_dit_L_64l8p2_fp16_doubleattn_weighted"
10
+ },
11
+ "sparse_structure_sampler": {
12
+ "name": "FlowEulerGuidanceIntervalSampler",
13
+ "args": {
14
+ "sigma_min": 1e-5
15
+ },
16
+ "params": {
17
+ "steps": 12,
18
+ "cfg_strength": 7.5,
19
+ "cfg_interval": [0.5, 1.0],
20
+ "rescale_t": 3.0
21
+ }
22
+ },
23
+ "slat_sampler": {
24
+ "name": "FlowEulerGuidanceIntervalSampler",
25
+ "args": {
26
+ "sigma_min": 1e-5
27
+ },
28
+ "params": {
29
+ "steps": 12,
30
+ "cfg_strength": 3.0,
31
+ "cfg_interval": [0.5, 1.0],
32
+ "rescale_t": 3.0
33
+ }
34
+ },
35
+ "slat_normalization": {
36
+ "mean": [
37
+ -2.1687545776367188,
38
+ -0.004347046371549368,
39
+ -0.13352349400520325,
40
+ -0.08418072760105133,
41
+ -0.5271206498146057,
42
+ 0.7238689064979553,
43
+ -1.1414450407028198,
44
+ 1.2039363384246826
45
+ ],
46
+ "std": [
47
+ 2.377650737762451,
48
+ 2.386378288269043,
49
+ 2.124418020248413,
50
+ 2.1748552322387695,
51
+ 2.663944721221924,
52
+ 2.371192216873169,
53
+ 2.6217446327209473,
54
+ 2.684523105621338
55
+ ]
56
+ },
57
+ "image_cond_model": "dinov2_vitl14_reg"
58
+ }
59
+ }