shriarul5273 commited on
Commit
9c05556
·
1 Parent(s): e1d5689

added PPD models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/workflows/huggingface.yml +3 -3
  2. Depth-Anything-V2/depth_anything_v2/__pycache__/__init__.cpython-311.pyc +0 -0
  3. Depth-Anything-V2/depth_anything_v2/__pycache__/dinov2.cpython-311.pyc +0 -0
  4. Depth-Anything-V2/depth_anything_v2/__pycache__/dpt.cpython-311.pyc +0 -0
  5. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-311.pyc +0 -0
  6. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-311.pyc +0 -0
  7. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-311.pyc +0 -0
  8. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-311.pyc +0 -0
  9. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-311.pyc +0 -0
  10. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-311.pyc +0 -0
  11. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-311.pyc +0 -0
  12. Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-311.pyc +0 -0
  13. Depth-Anything-V2/depth_anything_v2/util/__pycache__/__init__.cpython-311.pyc +0 -0
  14. Depth-Anything-V2/depth_anything_v2/util/__pycache__/blocks.cpython-311.pyc +0 -0
  15. Depth-Anything-V2/depth_anything_v2/util/__pycache__/transform.cpython-311.pyc +0 -0
  16. Depth-Anything/depth_anything/__pycache__/__init__.cpython-311.pyc +0 -0
  17. Depth-Anything/depth_anything/__pycache__/blocks.cpython-311.pyc +0 -0
  18. Depth-Anything/depth_anything/__pycache__/dpt.cpython-311.pyc +0 -0
  19. Depth-Anything/depth_anything/util/__pycache__/__init__.cpython-311.pyc +0 -0
  20. Depth-Anything/depth_anything/util/__pycache__/transform.cpython-311.pyc +0 -0
  21. Depth-Anything/torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-311.pyc +0 -0
  22. Depth-Anything/torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-311.pyc +0 -0
  23. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-311.pyc +0 -0
  24. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-311.pyc +0 -0
  25. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-311.pyc +0 -0
  26. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-311.pyc +0 -0
  27. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-311.pyc +0 -0
  28. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-311.pyc +0 -0
  29. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-311.pyc +0 -0
  30. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-311.pyc +0 -0
  31. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-311.pyc +0 -0
  32. Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-311.pyc +0 -0
  33. Pixel-Perfect-Depth/.gitattributes +54 -0
  34. Pixel-Perfect-Depth/app.py +209 -0
  35. Pixel-Perfect-Depth/assets/examples/0001.jpg +3 -0
  36. Pixel-Perfect-Depth/assets/examples/0002.png +3 -0
  37. Pixel-Perfect-Depth/assets/examples/0003.JPG +3 -0
  38. Pixel-Perfect-Depth/assets/examples/0004.png +3 -0
  39. Pixel-Perfect-Depth/assets/examples/0005.jpg +3 -0
  40. Pixel-Perfect-Depth/assets/examples/0006.PNG +3 -0
  41. Pixel-Perfect-Depth/assets/examples/0007.PNG +3 -0
  42. Pixel-Perfect-Depth/assets/examples/0008.PNG +3 -0
  43. Pixel-Perfect-Depth/assets/examples/0009.PNG +3 -0
  44. Pixel-Perfect-Depth/moge/__init__.py +0 -0
  45. Pixel-Perfect-Depth/moge/model/__init__.py +18 -0
  46. Pixel-Perfect-Depth/moge/model/dinov2/__init__.py +6 -0
  47. Pixel-Perfect-Depth/moge/model/dinov2/hub/__init__.py +4 -0
  48. Pixel-Perfect-Depth/moge/model/dinov2/hub/backbones.py +156 -0
  49. Pixel-Perfect-Depth/moge/model/dinov2/hub/utils.py +39 -0
  50. Pixel-Perfect-Depth/moge/model/dinov2/layers/__init__.py +11 -0
.github/workflows/huggingface.yml CHANGED
@@ -17,9 +17,9 @@ jobs:
17
  env:
18
  HF: ${{secrets.HF_TOKEN }}
19
  HFUSER: ${{secrets.HFUSER }}
20
- run: git remote add space https://$HFUSER:$HF@huggingface.co/spaces/$HFUSER/Depth-Anything-Compare-demo
21
- - name: Push to hub
22
  env:
23
  HF: ${{ secrets.HF_TOKEN}}
24
  HFUSER: ${{secrets.HFUSER }}
25
- run: git push --force https://$HFUSER:$HF@huggingface.co/spaces/$HFUSER/Depth-Anything-Compare-demo main
 
17
  env:
18
  HF: ${{secrets.HF_TOKEN }}
19
  HFUSER: ${{secrets.HFUSER }}
20
+ run: git remote add space https://$HFUSER:$HF@huggingface.co/spaces/$HFUSER/Depth-Estimation-Compare-demo
21
+ - name: Push to huggingface hub
22
  env:
23
  HF: ${{ secrets.HF_TOKEN}}
24
  HFUSER: ${{secrets.HFUSER }}
25
+ run: git push --force https://$HFUSER:$HF@huggingface.co/spaces/$HFUSER/Depth-Estimation-Compare-demo main
Depth-Anything-V2/depth_anything_v2/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (211 Bytes)
 
Depth-Anything-V2/depth_anything_v2/__pycache__/dinov2.cpython-311.pyc DELETED
Binary file (21.8 kB)
 
Depth-Anything-V2/depth_anything_v2/__pycache__/dpt.cpython-311.pyc DELETED
Binary file (11.8 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (525 Bytes)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-311.pyc DELETED
Binary file (4.48 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-311.pyc DELETED
Binary file (15.5 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-311.pyc DELETED
Binary file (1.87 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-311.pyc DELETED
Binary file (1.63 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-311.pyc DELETED
Binary file (2.09 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-311.pyc DELETED
Binary file (4.46 kB)
 
Depth-Anything-V2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-311.pyc DELETED
Binary file (3.31 kB)
 
Depth-Anything-V2/depth_anything_v2/util/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (216 Bytes)
 
Depth-Anything-V2/depth_anything_v2/util/__pycache__/blocks.cpython-311.pyc DELETED
Binary file (6.03 kB)
 
Depth-Anything-V2/depth_anything_v2/util/__pycache__/transform.cpython-311.pyc DELETED
Binary file (7.69 kB)
 
Depth-Anything/depth_anything/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (205 Bytes)
 
Depth-Anything/depth_anything/__pycache__/blocks.cpython-311.pyc DELETED
Binary file (5.96 kB)
 
Depth-Anything/depth_anything/__pycache__/dpt.cpython-311.pyc DELETED
Binary file (10.7 kB)
 
Depth-Anything/depth_anything/util/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (210 Bytes)
 
Depth-Anything/depth_anything/util/__pycache__/transform.cpython-311.pyc DELETED
Binary file (10.7 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-311.pyc DELETED
Binary file (6.34 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-311.pyc DELETED
Binary file (21.3 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (229 Bytes)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (600 Bytes)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-311.pyc DELETED
Binary file (4.5 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-311.pyc DELETED
Binary file (15.5 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-311.pyc DELETED
Binary file (3.95 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-311.pyc DELETED
Binary file (1.89 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-311.pyc DELETED
Binary file (1.65 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-311.pyc DELETED
Binary file (2.11 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-311.pyc DELETED
Binary file (4.48 kB)
 
Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-311.pyc DELETED
Binary file (3.32 kB)
 
Pixel-Perfect-Depth/.gitattributes ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/examples/0001.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assets/examples/0003.png filter=lfs diff=lfs merge=lfs -text
38
+ assets/examples/0004.png filter=lfs diff=lfs merge=lfs -text
39
+ assets/examples/0005.png filter=lfs diff=lfs merge=lfs -text
40
+ assets/examples/0006.jpg filter=lfs diff=lfs merge=lfs -text
41
+ assets/examples/0007.jpg filter=lfs diff=lfs merge=lfs -text
42
+ assets/examples/0008.jpg filter=lfs diff=lfs merge=lfs -text
43
+ assets/examples/0009.jpg filter=lfs diff=lfs merge=lfs -text
44
+ assets/examples/0010.jpg filter=lfs diff=lfs merge=lfs -text
45
+ assets/examples/0004.jpg filter=lfs diff=lfs merge=lfs -text
46
+ assets/examples/0005.jpg filter=lfs diff=lfs merge=lfs -text
47
+ assets/examples/0011.jpg filter=lfs diff=lfs merge=lfs -text
48
+ assets/examples/0001.png filter=lfs diff=lfs merge=lfs -text
49
+ assets/examples/0002.png filter=lfs diff=lfs merge=lfs -text
50
+ assets/examples/0003.JPG filter=lfs diff=lfs merge=lfs -text
51
+ assets/examples/0006.PNG filter=lfs diff=lfs merge=lfs -text
52
+ assets/examples/0007.PNG filter=lfs diff=lfs merge=lfs -text
53
+ assets/examples/0008.PNG filter=lfs diff=lfs merge=lfs -text
54
+ assets/examples/0009.PNG filter=lfs diff=lfs merge=lfs -text
Pixel-Perfect-Depth/app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import matplotlib
4
+ import numpy as np
5
+ import os
6
+ import time
7
+ from PIL import Image
8
+ import torch
9
+ import torch.nn.functional as F
10
+ import open3d as o3d
11
+ import trimesh
12
+ import tempfile
13
+ import shutil
14
+ from pathlib import Path
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from gradio_imageslider import ImageSlider
17
+ from huggingface_hub import hf_hub_download
18
+
19
+ from ppd.utils.set_seed import set_seed
20
+ from ppd.utils.align_depth_func import recover_metric_depth_ransac
21
+ from ppd.utils.depth2pcd import depth2pcd
22
+ from moge.model.v2 import MoGeModel
23
+ from ppd.models.ppd import PixelPerfectDepth
24
+
25
+ try:
26
+ import spaces
27
+ HUGGINFACE_SPACES_INSTALLED = True
28
+ except ImportError:
29
+ HUGGINFACE_SPACES_INSTALLED = False
30
+
31
+ css = """
32
+ #img-display-container {
33
+ max-height: 100vh;
34
+ }
35
+ #img-display-input {
36
+ max-height: 100vh;
37
+ }
38
+ #img-display-output {
39
+ max-height: 100vh;
40
+ }
41
+ #download {
42
+ height: 62px;
43
+ }
44
+
45
+ #img-display-output .image-slider-image {
46
+ object-fit: contain !important;
47
+ width: 100% !important;
48
+ height: 100% !important;
49
+ }
50
+ """
51
+
52
+ set_seed(666)
53
+
54
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
55
+
56
+ default_steps = 20
57
+ model = PixelPerfectDepth(sampling_steps=default_steps)
58
+ ckpt_path = hf_hub_download(
59
+ repo_id="gangweix/Pixel-Perfect-Depth",
60
+ filename="ppd.pth",
61
+ repo_type="model"
62
+ )
63
+ state_dict = torch.load(ckpt_path, map_location="cpu")
64
+ model.load_state_dict(state_dict, strict=False)
65
+ model = model.eval()
66
+ model = model.to(DEVICE)
67
+
68
+ moge_model = MoGeModel.from_pretrained("Ruicheng/moge-2-vitl-normal").eval()
69
+ moge_model = moge_model.to(DEVICE)
70
+
71
+
72
+ def main(share=True):
73
+ print("Initializing Pixel-Perfect Depth Demo...")
74
+
75
+ cmap = matplotlib.colormaps.get_cmap('Spectral')
76
+
77
+ title = "# Pixel-Perfect Depth"
78
+ description = """Official demo for **Pixel-Perfect Depth**.
79
+ Please refer to our [paper](https://arxiv.org/pdf/2510.07316), [project page](https://pixel-perfect-depth.github.io), and [github](https://github.com/gangweix/pixel-perfect-depth) for more details."""
80
+
81
+ @(spaces.GPU if HUGGINFACE_SPACES_INSTALLED else (lambda x: x))
82
+ def predict_depth(image, denoise_steps):
83
+ depth, resize_image = model.infer_image(image, sampling_steps=denoise_steps)
84
+ return depth, resize_image
85
+
86
+ @(spaces.GPU if HUGGINFACE_SPACES_INSTALLED else (lambda x: x))
87
+ def predict_moge_depth(image):
88
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
89
+ image = torch.tensor(image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1)
90
+ metric_depth, mask, intrinsics = moge_model.infer(image)
91
+ metric_depth[~mask] = metric_depth[mask].max()
92
+ return metric_depth, mask, intrinsics
93
+
94
+ def on_submit(image, denoise_steps, apply_filter, request: gr.Request = None):
95
+
96
+ H, W = image.shape[:2]
97
+ ppd_depth, resize_image = predict_depth(image[:, :, ::-1], denoise_steps)
98
+ resize_H, resize_W = resize_image.shape[:2]
99
+
100
+ # moge provide metric depth and intrinsics
101
+ moge_depth, mask, intrinsics = predict_moge_depth(resize_image)
102
+
103
+ # relative depth -> metric depth
104
+ metric_depth = recover_metric_depth_ransac(ppd_depth, moge_depth, mask)
105
+ intrinsics[0, 0] *= resize_W
106
+ intrinsics[1, 1] *= resize_H
107
+ intrinsics[0, 2] *= resize_W
108
+ intrinsics[1, 2] *= resize_H
109
+
110
+ # metric depth -> point cloud
111
+ pcd = depth2pcd(metric_depth, intrinsics, color=cv2.cvtColor(resize_image, cv2.COLOR_BGR2RGB), input_mask=mask, ret_pcd=True)
112
+ if apply_filter:
113
+ cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
114
+ pcd = pcd.select_by_index(ind)
115
+
116
+ tempdir = Path(tempfile.gettempdir(), 'ppd')
117
+ tempdir.mkdir(exist_ok=True)
118
+ output_path = Path(tempdir, request.session_hash)
119
+ shutil.rmtree(output_path, ignore_errors=True)
120
+ output_path.mkdir(exist_ok=True, parents=True)
121
+
122
+ ply_path = os.path.join(output_path, 'pointcloud.ply')
123
+
124
+ # save pcd to temporary .ply
125
+ pcd.points = o3d.utility.Vector3dVector(
126
+ np.asarray(pcd.points) * np.array([1, -1, -1], dtype=np.float32)
127
+ )
128
+ o3d.io.write_point_cloud(ply_path, pcd)
129
+ vertices = np.asarray(pcd.points)
130
+ vertex_colors = (np.asarray(pcd.colors) * 255).astype(np.uint8)
131
+ mesh = trimesh.PointCloud(vertices=vertices, colors=vertex_colors)
132
+ glb_path = os.path.join(output_path, 'pointcloud.glb')
133
+ mesh.export(glb_path)
134
+
135
+
136
+ # save raw depth (npy)
137
+ depth = cv2.resize(ppd_depth, (W, H), interpolation=cv2.INTER_LINEAR)
138
+ raw_depth_path = os.path.join(output_path, 'raw_depth.npy')
139
+ np.save(raw_depth_path, depth)
140
+
141
+ depth_vis = (depth - depth.min()) / (depth.max() - depth.min() + 1e-5) * 255.0
142
+ depth_vis = depth_vis.astype(np.uint8)
143
+ colored_depth = (cmap(depth_vis)[:, :, :3] * 255).astype(np.uint8)
144
+
145
+ split_region = np.ones((image.shape[0], 50, 3), dtype=np.uint8) * 255
146
+ combined_result = cv2.hconcat([image[:, :, ::-1], split_region, colored_depth[:, :, ::-1]])
147
+
148
+ vis_path = os.path.join(output_path, 'image_depth_vis.png')
149
+ cv2.imwrite(vis_path, combined_result)
150
+
151
+ file_names = ["image_depth_vis.png", "raw_depth.npy", "pointcloud.ply"]
152
+
153
+ download_files = [
154
+ (output_path / name).as_posix()
155
+ for name in file_names
156
+ if (output_path / name).exists()
157
+ ]
158
+
159
+ return [(image, colored_depth), glb_path, download_files]
160
+
161
+
162
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
163
+ gr.Markdown(title)
164
+ gr.Markdown(description)
165
+ gr.Markdown("### Point Cloud & Depth Prediction demo")
166
+
167
+ with gr.Row():
168
+ # Left: input image + settings
169
+ with gr.Column():
170
+ input_image = gr.Image(label="Input Image", image_mode="RGB", type='numpy', elem_id='img-display-input')
171
+ with gr.Accordion(label="Settings", open=False):
172
+ denoise_steps = gr.Slider(label="Denoising Steps", minimum=1, maximum=100, value=20, step=1)
173
+ apply_filter = gr.Checkbox(label="Apply filter points", value=True)
174
+ submit_btn = gr.Button(value="Predict")
175
+
176
+ # Right: 3D point cloud + depth
177
+ with gr.Column():
178
+ with gr.Tabs():
179
+ with gr.Tab("3D View"):
180
+ model_3d = gr.Model3D(display_mode="solid", label="3D Point Map", clear_color=[1,1,1,1], height="60vh")
181
+ with gr.Tab("Depth"):
182
+ depth_map = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5)
183
+ with gr.Tab("Download"):
184
+ download_files = gr.File(type='filepath', label="Download Files")
185
+
186
+ submit_btn.click(
187
+ fn=lambda: [None, None, None, "", "", ""],
188
+ outputs=[depth_map, model_3d, download_files]
189
+ ).then(
190
+ fn=on_submit,
191
+ inputs=[input_image, denoise_steps, apply_filter],
192
+ outputs=[depth_map, model_3d, download_files]
193
+ )
194
+
195
+ example_files = os.listdir('assets/examples')
196
+ example_files.sort()
197
+ example_files = [os.path.join('assets/examples', filename) for filename in example_files]
198
+ examples = gr.Examples(
199
+ examples=example_files,
200
+ inputs=input_image,
201
+ outputs=[depth_map, model_3d, download_files],
202
+ fn=on_submit,
203
+ cache_examples=False
204
+ )
205
+
206
+ demo.queue().launch(share=share)
207
+
208
+ if __name__ == '__main__':
209
+ main(share=True)
Pixel-Perfect-Depth/assets/examples/0001.jpg ADDED

Git LFS Details

  • SHA256: 4205842dfc133d8e76625ded7c31d3a2a2b8f9500919a0f4ecedc32a9bac87be
  • Pointer size: 131 Bytes
  • Size of remote file: 249 kB
Pixel-Perfect-Depth/assets/examples/0002.png ADDED

Git LFS Details

  • SHA256: d97f69094e48a27cb72ff7be5f7ddcde1eb4da31aee237867cabf1ea2abd5310
  • Pointer size: 132 Bytes
  • Size of remote file: 1.24 MB
Pixel-Perfect-Depth/assets/examples/0003.JPG ADDED

Git LFS Details

  • SHA256: 3a194a4e6d6ca4ff07b51baba841ee775095a1d034dd67d24ed45e6da5928fb3
  • Pointer size: 132 Bytes
  • Size of remote file: 9.68 MB
Pixel-Perfect-Depth/assets/examples/0004.png ADDED

Git LFS Details

  • SHA256: 0d8cf934034b05e01d612452c5b4ae4381baf3929ae1944df09f614e5cbdb0d4
  • Pointer size: 131 Bytes
  • Size of remote file: 489 kB
Pixel-Perfect-Depth/assets/examples/0005.jpg ADDED

Git LFS Details

  • SHA256: eeebeb413ec78384dd5942992b5699ff281c6cc50a157e522a3b289e30d0b567
  • Pointer size: 131 Bytes
  • Size of remote file: 103 kB
Pixel-Perfect-Depth/assets/examples/0006.PNG ADDED

Git LFS Details

  • SHA256: b8967fdf774e36ba0c27deb104d0afd72d19accf590a5d21fb5ccc2726912eea
  • Pointer size: 132 Bytes
  • Size of remote file: 2.59 MB
Pixel-Perfect-Depth/assets/examples/0007.PNG ADDED

Git LFS Details

  • SHA256: 74c959aabb376bf2541f9bd4b88028e5ff7321a61cff03b2223c802c950537f6
  • Pointer size: 132 Bytes
  • Size of remote file: 2.6 MB
Pixel-Perfect-Depth/assets/examples/0008.PNG ADDED

Git LFS Details

  • SHA256: 668d2323220211f7c318b47668865689c4ff7ff8f9daec0dde0ca923dee47095
  • Pointer size: 132 Bytes
  • Size of remote file: 2.95 MB
Pixel-Perfect-Depth/assets/examples/0009.PNG ADDED

Git LFS Details

  • SHA256: 5489c9f5b1eb3d856ffd3225ebb8756eb935c92b2ea0f87f4bf00f4dc45c0336
  • Pointer size: 132 Bytes
  • Size of remote file: 2.38 MB
Pixel-Perfect-Depth/moge/__init__.py ADDED
File without changes
Pixel-Perfect-Depth/moge/model/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from typing import *
3
+
4
+ if TYPE_CHECKING:
5
+ from .v1 import MoGeModel as MoGeModelV1
6
+ from .v2 import MoGeModel as MoGeModelV2
7
+
8
+
9
+ def import_model_class_by_version(version: str) -> Type[Union['MoGeModelV1', 'MoGeModelV2']]:
10
+ assert version in ['v1', 'v2'], f'Unsupported model version: {version}'
11
+
12
+ try:
13
+ module = importlib.import_module(f'.{version}', __package__)
14
+ except ModuleNotFoundError:
15
+ raise ValueError(f'Model version "{version}" not found.')
16
+
17
+ cls = getattr(module, 'MoGeModel')
18
+ return cls
Pixel-Perfect-Depth/moge/model/dinov2/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the Apache License, Version 2.0
4
+ # found in the LICENSE file in the root directory of this source tree.
5
+
6
+ __version__ = "0.0.1"
Pixel-Perfect-Depth/moge/model/dinov2/hub/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the Apache License, Version 2.0
4
+ # found in the LICENSE file in the root directory of this source tree.
Pixel-Perfect-Depth/moge/model/dinov2/hub/backbones.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the Apache License, Version 2.0
4
+ # found in the LICENSE file in the root directory of this source tree.
5
+
6
+ from enum import Enum
7
+ from typing import Union
8
+
9
+ import torch
10
+
11
+ from .utils import _DINOV2_BASE_URL, _make_dinov2_model_name
12
+
13
+
14
+ class Weights(Enum):
15
+ LVD142M = "LVD142M"
16
+
17
+
18
+ def _make_dinov2_model(
19
+ *,
20
+ arch_name: str = "vit_large",
21
+ img_size: int = 518,
22
+ patch_size: int = 14,
23
+ init_values: float = 1.0,
24
+ ffn_layer: str = "mlp",
25
+ block_chunks: int = 0,
26
+ num_register_tokens: int = 0,
27
+ interpolate_antialias: bool = False,
28
+ interpolate_offset: float = 0.1,
29
+ pretrained: bool = True,
30
+ weights: Union[Weights, str] = Weights.LVD142M,
31
+ **kwargs,
32
+ ):
33
+ from ..models import vision_transformer as vits
34
+
35
+ if isinstance(weights, str):
36
+ try:
37
+ weights = Weights[weights]
38
+ except KeyError:
39
+ raise AssertionError(f"Unsupported weights: {weights}")
40
+
41
+ model_base_name = _make_dinov2_model_name(arch_name, patch_size)
42
+ vit_kwargs = dict(
43
+ img_size=img_size,
44
+ patch_size=patch_size,
45
+ init_values=init_values,
46
+ ffn_layer=ffn_layer,
47
+ block_chunks=block_chunks,
48
+ num_register_tokens=num_register_tokens,
49
+ interpolate_antialias=interpolate_antialias,
50
+ interpolate_offset=interpolate_offset,
51
+ )
52
+ vit_kwargs.update(**kwargs)
53
+ model = vits.__dict__[arch_name](**vit_kwargs)
54
+
55
+ if pretrained:
56
+ model_full_name = _make_dinov2_model_name(arch_name, patch_size, num_register_tokens)
57
+ url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth"
58
+ state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu")
59
+ model.load_state_dict(state_dict, strict=True)
60
+
61
+ return model
62
+
63
+
64
+ def dinov2_vits14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
65
+ """
66
+ DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
67
+ """
68
+ return _make_dinov2_model(arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs)
69
+
70
+
71
+ def dinov2_vitb14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
72
+ """
73
+ DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset.
74
+ """
75
+ return _make_dinov2_model(arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs)
76
+
77
+
78
+ def dinov2_vitl14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
79
+ """
80
+ DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
81
+ """
82
+ return _make_dinov2_model(arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs)
83
+
84
+
85
+ def dinov2_vitg14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
86
+ """
87
+ DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
88
+ """
89
+ return _make_dinov2_model(
90
+ arch_name="vit_giant2",
91
+ ffn_layer="swiglufused",
92
+ weights=weights,
93
+ pretrained=pretrained,
94
+ **kwargs,
95
+ )
96
+
97
+
98
+ def dinov2_vits14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
99
+ """
100
+ DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset.
101
+ """
102
+ return _make_dinov2_model(
103
+ arch_name="vit_small",
104
+ pretrained=pretrained,
105
+ weights=weights,
106
+ num_register_tokens=4,
107
+ interpolate_antialias=True,
108
+ interpolate_offset=0.0,
109
+ **kwargs,
110
+ )
111
+
112
+
113
+ def dinov2_vitb14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
114
+ """
115
+ DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset.
116
+ """
117
+ return _make_dinov2_model(
118
+ arch_name="vit_base",
119
+ pretrained=pretrained,
120
+ weights=weights,
121
+ num_register_tokens=4,
122
+ interpolate_antialias=True,
123
+ interpolate_offset=0.0,
124
+ **kwargs,
125
+ )
126
+
127
+
128
+ def dinov2_vitl14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
129
+ """
130
+ DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset.
131
+ """
132
+ return _make_dinov2_model(
133
+ arch_name="vit_large",
134
+ pretrained=pretrained,
135
+ weights=weights,
136
+ num_register_tokens=4,
137
+ interpolate_antialias=True,
138
+ interpolate_offset=0.0,
139
+ **kwargs,
140
+ )
141
+
142
+
143
+ def dinov2_vitg14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
144
+ """
145
+ DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset.
146
+ """
147
+ return _make_dinov2_model(
148
+ arch_name="vit_giant2",
149
+ ffn_layer="swiglufused",
150
+ weights=weights,
151
+ pretrained=pretrained,
152
+ num_register_tokens=4,
153
+ interpolate_antialias=True,
154
+ interpolate_offset=0.0,
155
+ **kwargs,
156
+ )
Pixel-Perfect-Depth/moge/model/dinov2/hub/utils.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the Apache License, Version 2.0
4
+ # found in the LICENSE file in the root directory of this source tree.
5
+
6
+ import itertools
7
+ import math
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+
13
+
14
+ _DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
15
+
16
+
17
+ def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str:
18
+ compact_arch_name = arch_name.replace("_", "")[:4]
19
+ registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
20
+ return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"
21
+
22
+
23
+ class CenterPadding(nn.Module):
24
+ def __init__(self, multiple):
25
+ super().__init__()
26
+ self.multiple = multiple
27
+
28
+ def _get_pad(self, size):
29
+ new_size = math.ceil(size / self.multiple) * self.multiple
30
+ pad_size = new_size - size
31
+ pad_size_left = pad_size // 2
32
+ pad_size_right = pad_size - pad_size_left
33
+ return pad_size_left, pad_size_right
34
+
35
+ @torch.inference_mode()
36
+ def forward(self, x):
37
+ pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1]))
38
+ output = F.pad(x, pads)
39
+ return output
Pixel-Perfect-Depth/moge/model/dinov2/layers/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ #
3
+ # This source code is licensed under the Apache License, Version 2.0
4
+ # found in the LICENSE file in the root directory of this source tree.
5
+
6
+ from .dino_head import DINOHead
7
+ from .mlp import Mlp
8
+ from .patch_embed import PatchEmbed
9
+ from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
10
+ from .block import NestedTensorBlock
11
+ from .attention import MemEffAttention