Spaces:

HorizonRobotics
/

3D-Fixer

Running on Zero

App Files Files Community

JasonYinnnn commited on 8 days ago

Commit

454fd88

1 Parent(s): 5add261

run dpt on GPU

Browse files

Files changed (1) hide show

app.py +10 -6

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import gradio as gr
 import spaces
 import os
 import uuid
 from typing import Any, List, Optional, Union
 import cv2
@@ -184,6 +185,7 @@ def run_segmentation(
     return seg_map_pil
 def run_depth_estimation(
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
@@ -192,6 +194,10 @@ def run_depth_estimation(
     rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
     global dpt_pack
     global work_space
     if work_space is None:
@@ -210,7 +216,7 @@ def run_depth_estimation(
     W, H = rgb_image.size
     input_image = np.array(rgb_image).astype(np.float32)
-    input_image = torch.tensor(input_image / 255, dtype=torch.float32, device='cpu').permute(2, 0, 1)
     with torch.no_grad():
         output = moge_v2_dpt_model.infer(input_image)
@@ -227,7 +233,7 @@ def run_depth_estimation(
             [0, intrinsics[1, 1].item() * H, 0.5*H],
             [0, 0, 1]
         ])
-    ).to(dtype=torch.float32, device='cpu')
     dpt_pack = {
         'c2w': c2w.to('cpu'),
@@ -356,8 +362,6 @@ def run_generation(
     generated_object_map = {}
     run_id = str(uuid.uuid4())
-    # pipeline.cuda()
     if not isinstance(rgb_image, Image.Image) and "image" in rgb_image:
         rgb_image = rgb_image["image"]
@@ -844,12 +848,12 @@ if __name__ == '__main__':
     segmenter_id = "facebook/sam-vit-base"
     sam_processor = AutoProcessor.from_pretrained(segmenter_id)
     sam_segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(
-        "cpu", torch.float32
     )
     mogev2_id = 'Ruicheng/moge-2-vitl'
     moge_v2_dpt_model = MoGeModel.from_pretrained(mogev2_id).to(
-        "cpu", torch.float32
     )
     ############## 3D-Fixer model

 import spaces
 import os
+os.environ['SPCONV_ALGO'] = 'native'
 import uuid
 from typing import Any, List, Optional, Union
 import cv2
     return seg_map_pil
+@spaces.GPU
 def run_depth_estimation(
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
     rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    dtype = torch.float16 if device == 'cuda' else torch.float32
+    moge_v2_dpt_model = moge_v2_dpt_model.to(device=device, dtype=dtype)
     global dpt_pack
     global work_space
     if work_space is None:
     W, H = rgb_image.size
     input_image = np.array(rgb_image).astype(np.float32)
+    input_image = torch.tensor(input_image / 255, dtype=torch.float32, device=device).permute(2, 0, 1)
     with torch.no_grad():
         output = moge_v2_dpt_model.infer(input_image)
             [0, intrinsics[1, 1].item() * H, 0.5*H],
             [0, 0, 1]
         ])
+    ).to(dtype=torch.float32, device=device)
     dpt_pack = {
         'c2w': c2w.to('cpu'),
     generated_object_map = {}
     run_id = str(uuid.uuid4())
     if not isinstance(rgb_image, Image.Image) and "image" in rgb_image:
         rgb_image = rgb_image["image"]
     segmenter_id = "facebook/sam-vit-base"
     sam_processor = AutoProcessor.from_pretrained(segmenter_id)
     sam_segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to(
+        "cpu", dtype=torch.float32
     )
     mogev2_id = 'Ruicheng/moge-2-vitl'
     moge_v2_dpt_model = MoGeModel.from_pretrained(mogev2_id).to(
+        "cpu", dtype=torch.float32
     )
     ############## 3D-Fixer model