Spaces:
Running on Zero
Running on Zero
Commit ·
6f72c86
1
Parent(s): ab34f95
run dpt on CPU
Browse files- app.py +9 -37
- requirements.txt +0 -5
app.py
CHANGED
|
@@ -17,6 +17,7 @@ import trimesh
|
|
| 17 |
import random
|
| 18 |
import imageio
|
| 19 |
from einops import repeat
|
|
|
|
| 20 |
from threeDFixer.pipelines import ThreeDFixerPipeline
|
| 21 |
from threeDFixer.datasets.utils import (
|
| 22 |
edge_mask_morph_gradient,
|
|
@@ -51,36 +52,6 @@ work_space = None
|
|
| 51 |
dpt_pack = None
|
| 52 |
generated_object_map = {}
|
| 53 |
|
| 54 |
-
# Prepare models
|
| 55 |
-
## Grounding SAM
|
| 56 |
-
segmenter_id = "facebook/sam-vit-base"
|
| 57 |
-
sam_processor = AutoProcessor.from_pretrained(segmenter_id)
|
| 58 |
-
sam_segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(
|
| 59 |
-
DEVICE, DTYPE
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
# ############## 3D-Fixer model
|
| 63 |
-
# model_dir = 'HorizonRobotics/3D-Fixer'
|
| 64 |
-
# pipeline = ThreeDFixerPipeline.from_pretrained(
|
| 65 |
-
# model_dir, compile=False
|
| 66 |
-
# )
|
| 67 |
-
# pipeline.to(device=DEVICE)
|
| 68 |
-
# ############## 3D-Fixer model
|
| 69 |
-
|
| 70 |
-
# rot = np.array([
|
| 71 |
-
# [-1.0, 0.0, 0.0, 0.0],
|
| 72 |
-
# [ 0.0, 0.0, 1.0, 0.0],
|
| 73 |
-
# [ 0.0, 1.0, 0.0, 0.0],
|
| 74 |
-
# [ 0.0, 0.0, 0.0, 1.0],
|
| 75 |
-
# ], dtype=np.float32)
|
| 76 |
-
|
| 77 |
-
# c2w = torch.tensor([
|
| 78 |
-
# [1.0, 0.0, 0.0, 0.0],
|
| 79 |
-
# [0.0, 0.0, -1.0, 0.0],
|
| 80 |
-
# [0.0, 1.0, 0.0, 0.0],
|
| 81 |
-
# [0.0, 0.0, 0.0, 1.0],
|
| 82 |
-
# ], dtype=torch.float32, device=DEVICE)
|
| 83 |
-
|
| 84 |
save_projected_colored_pcd = lambda pts, pts_color, fpath: trimesh.PointCloud(pts.reshape(-1, 3), pts_color.reshape(-1, 3)).export(fpath)
|
| 85 |
|
| 86 |
EXAMPLES = [
|
|
@@ -213,7 +184,6 @@ def run_segmentation(
|
|
| 213 |
|
| 214 |
return seg_map_pil
|
| 215 |
|
| 216 |
-
@spaces.GPU
|
| 217 |
def run_depth_estimation(
|
| 218 |
image_prompts: Any,
|
| 219 |
seg_image: Union[str, Image.Image],
|
|
@@ -223,9 +193,6 @@ def run_depth_estimation(
|
|
| 223 |
|
| 224 |
rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
|
| 225 |
|
| 226 |
-
# global pipeline
|
| 227 |
-
# pipeline.cuda()
|
| 228 |
-
|
| 229 |
global dpt_pack
|
| 230 |
global work_space
|
| 231 |
if work_space is None:
|
|
@@ -247,7 +214,7 @@ def run_depth_estimation(
|
|
| 247 |
input_image = torch.tensor(input_image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1)
|
| 248 |
|
| 249 |
with torch.no_grad():
|
| 250 |
-
output =
|
| 251 |
depth = output['depth']
|
| 252 |
intrinsics = output['intrinsics']
|
| 253 |
|
|
@@ -877,14 +844,19 @@ if __name__ == '__main__':
|
|
| 877 |
dpt_pack = None
|
| 878 |
generated_object_map = {}
|
| 879 |
|
| 880 |
-
DEVICE = "
|
| 881 |
|
| 882 |
# Prepare models
|
| 883 |
## Grounding SAM
|
| 884 |
segmenter_id = "facebook/sam-vit-base"
|
| 885 |
sam_processor = AutoProcessor.from_pretrained(segmenter_id)
|
| 886 |
sam_segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(
|
| 887 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
)
|
| 889 |
|
| 890 |
############## 3D-Fixer model
|
|
|
|
| 17 |
import random
|
| 18 |
import imageio
|
| 19 |
from einops import repeat
|
| 20 |
+
from threeDFixer.moge.model.v2 import MoGeModel
|
| 21 |
from threeDFixer.pipelines import ThreeDFixerPipeline
|
| 22 |
from threeDFixer.datasets.utils import (
|
| 23 |
edge_mask_morph_gradient,
|
|
|
|
| 52 |
dpt_pack = None
|
| 53 |
generated_object_map = {}
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
save_projected_colored_pcd = lambda pts, pts_color, fpath: trimesh.PointCloud(pts.reshape(-1, 3), pts_color.reshape(-1, 3)).export(fpath)
|
| 56 |
|
| 57 |
EXAMPLES = [
|
|
|
|
| 184 |
|
| 185 |
return seg_map_pil
|
| 186 |
|
|
|
|
| 187 |
def run_depth_estimation(
|
| 188 |
image_prompts: Any,
|
| 189 |
seg_image: Union[str, Image.Image],
|
|
|
|
| 193 |
|
| 194 |
rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
|
| 195 |
|
|
|
|
|
|
|
|
|
|
| 196 |
global dpt_pack
|
| 197 |
global work_space
|
| 198 |
if work_space is None:
|
|
|
|
| 214 |
input_image = torch.tensor(input_image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1)
|
| 215 |
|
| 216 |
with torch.no_grad():
|
| 217 |
+
output = moge_v2_dpt_model.infer(input_image)
|
| 218 |
depth = output['depth']
|
| 219 |
intrinsics = output['intrinsics']
|
| 220 |
|
|
|
|
| 844 |
dpt_pack = None
|
| 845 |
generated_object_map = {}
|
| 846 |
|
| 847 |
+
DEVICE = "cuda"
|
| 848 |
|
| 849 |
# Prepare models
|
| 850 |
## Grounding SAM
|
| 851 |
segmenter_id = "facebook/sam-vit-base"
|
| 852 |
sam_processor = AutoProcessor.from_pretrained(segmenter_id)
|
| 853 |
sam_segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(
|
| 854 |
+
"cpu", torch.float32
|
| 855 |
+
)
|
| 856 |
+
|
| 857 |
+
mogev2_id = 'Ruicheng/moge-2-vitl'
|
| 858 |
+
moge_v2_dpt_model = MoGeModel.from_pretrained(mogev2_id).to(
|
| 859 |
+
"cpu", torch.float32
|
| 860 |
)
|
| 861 |
|
| 862 |
############## 3D-Fixer model
|
requirements.txt
CHANGED
|
@@ -37,11 +37,6 @@ huggingface_hub==0.36.0
|
|
| 37 |
git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
|
| 38 |
git+https://github.com/facebookresearch/sam2.git
|
| 39 |
pydantic==2.10.6
|
| 40 |
-
# gradio==4.38.1
|
| 41 |
-
# gradio_client==1.1.0
|
| 42 |
-
# fastapi==0.111.1
|
| 43 |
-
# httpx==0.27.0
|
| 44 |
kaolin==0.18.0
|
| 45 |
flash-attn==2.8.3+pt2.8.0cu129
|
| 46 |
nvdiffrast==0.4.0+253ac4fpt2.8.0cu129
|
| 47 |
-
xformers
|
|
|
|
| 37 |
git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
|
| 38 |
git+https://github.com/facebookresearch/sam2.git
|
| 39 |
pydantic==2.10.6
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
kaolin==0.18.0
|
| 41 |
flash-attn==2.8.3+pt2.8.0cu129
|
| 42 |
nvdiffrast==0.4.0+253ac4fpt2.8.0cu129
|
|
|