Spaces:

HorizonRobotics
/

3D-Fixer

Running on Zero

App Files Files Community

JasonYinnnn commited on 10 days ago

Commit

9b3eb99

1 Parent(s): 054d245

add debug

Browse files

Files changed (1) hide show

app.py +129 -118

app.py CHANGED Viewed

@@ -84,7 +84,7 @@ MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
 EXAMPLE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/example_data")
 DTYPE = torch.float16
-DEVICE = "cuda"
 VALID_RATIO_THRESHOLD = 0.005
 CROP_SIZE = 518
 work_space = None
@@ -220,45 +220,51 @@ def run_segmentation(
     image_prompts: Any,
     polygon_refinement: bool = True,
 ) -> Image.Image:
-    rgb_image = image_prompts["image"].convert("RGB")
-    global work_space
-    global sam2_predictor
-    if sam2_predictor is None:
-        sam2_model = build_sam2(
-            config_file=SAM2_CONFIG,
-            ckpt_path=SAM2_CHECKPOINT,
-        )
-        sam2_predictor = SAM2ImagePredictor(sam2_model)
-    # pre-process the layers and get the xyxy boxes of each layer
-    if len(image_prompts["points"]) == 0:
-        gr.Error("No points provided for segmentation. Please add points to the image.")
-        return None
-    boxes = [
-        [
-            [int(box[0]), int(box[1]), int(box[3]), int(box[4])]
-            for box in image_prompts["points"]
         ]
-    ]
-    detections = segment(
-        sam2_predictor,
-        rgb_image,
-        boxes=[boxes],
-        polygon_refinement=polygon_refinement,
-    )
-    seg_map_pil = plot_segmentation(rgb_image, detections)
-    torch.cuda.empty_cache()
-    cleanup_tmp(TMP_DIR, expire_seconds=3600)
-    work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
-    os.makedirs(work_space, exist_ok=True)
-    seg_map_pil.save(os.path.join(work_space, 'mask.png'))
     return seg_map_pil
@@ -268,92 +274,97 @@ def run_depth_estimation(
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
 ) -> Image.Image:
-    rgb_image = image_prompts["image"].convert("RGB")
-    rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
-    global pipeline
-    pipeline.cuda()
-    global dpt_pack
-    global work_space
-    if work_space is None:
-        work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
-        os.makedirs(work_space, exist_ok=True)
-    global generated_object_map
-    generated_object_map = {}
-    origin_W, origin_H = rgb_image.size
-    if max(origin_H, origin_W) > 1024:
-        factor = max(origin_H, origin_W) / 1024
-        H = int(origin_H // factor)
-        W = int(origin_W // factor)
-        rgb_image = rgb_image.resize((W, H), Image.Resampling.LANCZOS)
-    W, H = rgb_image.size
-    input_image = np.array(rgb_image).astype(np.float32)
-    input_image = torch.tensor(input_image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1)
-    output = pipeline.models['scene_cond_model'].infer(input_image)
-    depth = output['depth']
-    intrinsics = output['intrinsics']
-    invalid_mask = torch.logical_or(torch.isnan(depth), torch.isinf(depth))
-    depth_mask = ~invalid_mask
-    depth = torch.where(invalid_mask, 0.0, depth)
-    K = torch.from_numpy(
-        np.array([
-            [intrinsics[0, 0].item() * W, 0, 0.5*W],
-            [0, intrinsics[1, 1].item() * H, 0.5*H],
-            [0, 0, 1]
-        ])
-    ).to(dtype=torch.float32, device=DEVICE)
-    dpt_pack = {
-        'c2w': c2w.to(DEVICE),
-        'K': K,
-        'depth_mask': depth_mask,
-        'depth': depth
-    }
-    instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
-    seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
-    seg_image = np.array(seg_image)
-    mask_pack = []
-    for instance_label in instance_labels:
-        if (instance_label == np.array([0, 0, 0])).all():
-            continue
-        else:
-            instance_mask = (seg_image.reshape(-1, 3) == instance_label).all(axis=-1).reshape(H, W)
-            mask_pack.append(instance_mask)
-    fg_mask = torch.from_numpy(np.stack(mask_pack).any(axis=0)).to(DEVICE)
-    scene_est_depth_pts, scene_est_depth_pts_colors = \
-        project2ply(depth_mask, depth, input_image, K, c2w)
-    save_ply_path = os.path.join(work_space, "scene_pcd.glb")
-    fg_depth_pts, _ = \
-        project2ply(fg_mask, depth, input_image, K, c2w)
-    _, trans, scale = normalize_vertices(fg_depth_pts)
-    if trans.shape[0] == 1:
-        trans = trans[0]
-    dpt_pack.update(
-        {
-            "trans": trans,
-            "scale": scale,
-        }
-    )
-    trimesh.PointCloud(scene_est_depth_pts.reshape(-1, 3), scene_est_depth_pts_colors.reshape(-1, 3)).\
-        apply_translation(-trans).apply_scale(1. / (scale + 1e-6)).\
-        apply_transform(rot).export(save_ply_path)
-    torch.cuda.empty_cache()
     return save_ply_path

 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
 EXAMPLE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets/example_data")
 DTYPE = torch.float16
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 VALID_RATIO_THRESHOLD = 0.005
 CROP_SIZE = 518
 work_space = None
     image_prompts: Any,
     polygon_refinement: bool = True,
 ) -> Image.Image:
+    try:
+        rgb_image = image_prompts["image"].convert("RGB")
+        global work_space
+        global sam2_predictor
+        if sam2_predictor is None:
+            sam2_model = build_sam2(
+                config_file=SAM2_CONFIG,
+                ckpt_path=SAM2_CHECKPOINT,
+            )
+            sam2_predictor = SAM2ImagePredictor(sam2_model)
+        # pre-process the layers and get the xyxy boxes of each layer
+        if len(image_prompts["points"]) == 0:
+            gr.Error("No points provided for segmentation. Please add points to the image.")
+            return None
+        boxes = [
+            [
+                [int(box[0]), int(box[1]), int(box[3]), int(box[4])]
+                for box in image_prompts["points"]
+            ]
         ]
+        detections = segment(
+            sam2_predictor,
+            rgb_image,
+            boxes=[boxes],
+            polygon_refinement=polygon_refinement,
+        )
+        seg_map_pil = plot_segmentation(rgb_image, detections)
+        torch.cuda.empty_cache()
+        cleanup_tmp(TMP_DIR, expire_seconds=3600)
+        work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
+        os.makedirs(work_space, exist_ok=True)
+        seg_map_pil.save(os.path.join(work_space, 'mask.png'))
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise gr.Error(f"run_segmentation failed: {e}")
     return seg_map_pil
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
 ) -> Image.Image:
+    try:
+        rgb_image = image_prompts["image"].convert("RGB")
+        rgb_image = rgb_image.resize((1024, 1024), Image.Resampling.LANCZOS)
+        global pipeline
+        pipeline.cuda()
+        global dpt_pack
+        global work_space
+        if work_space is None:
+            work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
+            os.makedirs(work_space, exist_ok=True)
+        global generated_object_map
+        generated_object_map = {}
+        origin_W, origin_H = rgb_image.size
+        if max(origin_H, origin_W) > 1024:
+            factor = max(origin_H, origin_W) / 1024
+            H = int(origin_H // factor)
+            W = int(origin_W // factor)
+            rgb_image = rgb_image.resize((W, H), Image.Resampling.LANCZOS)
+        W, H = rgb_image.size
+        input_image = np.array(rgb_image).astype(np.float32)
+        input_image = torch.tensor(input_image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1)
+        output = pipeline.models['scene_cond_model'].infer(input_image)
+        depth = output['depth']
+        intrinsics = output['intrinsics']
+        invalid_mask = torch.logical_or(torch.isnan(depth), torch.isinf(depth))
+        depth_mask = ~invalid_mask
+        depth = torch.where(invalid_mask, 0.0, depth)
+        K = torch.from_numpy(
+            np.array([
+                [intrinsics[0, 0].item() * W, 0, 0.5*W],
+                [0, intrinsics[1, 1].item() * H, 0.5*H],
+                [0, 0, 1]
+            ])
+        ).to(dtype=torch.float32, device=DEVICE)
+        dpt_pack = {
+            'c2w': c2w.to(DEVICE),
+            'K': K,
+            'depth_mask': depth_mask,
+            'depth': depth
+        }
+        instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
+        seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
+        seg_image = np.array(seg_image)
+        mask_pack = []
+        for instance_label in instance_labels:
+            if (instance_label == np.array([0, 0, 0])).all():
+                continue
+            else:
+                instance_mask = (seg_image.reshape(-1, 3) == instance_label).all(axis=-1).reshape(H, W)
+                mask_pack.append(instance_mask)
+        fg_mask = torch.from_numpy(np.stack(mask_pack).any(axis=0)).to(DEVICE)
+        scene_est_depth_pts, scene_est_depth_pts_colors = \
+            project2ply(depth_mask, depth, input_image, K, c2w)
+        save_ply_path = os.path.join(work_space, "scene_pcd.glb")
+        fg_depth_pts, _ = \
+            project2ply(fg_mask, depth, input_image, K, c2w)
+        _, trans, scale = normalize_vertices(fg_depth_pts)
+        if trans.shape[0] == 1:
+            trans = trans[0]
+        dpt_pack.update(
+            {
+                "trans": trans,
+                "scale": scale,
+            }
+        )
+        trimesh.PointCloud(scene_est_depth_pts.reshape(-1, 3), scene_est_depth_pts_colors.reshape(-1, 3)).\
+            apply_translation(-trans).apply_scale(1. / (scale + 1e-6)).\
+            apply_transform(rot).export(save_ply_path)
+        torch.cuda.empty_cache()
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise gr.Error(f"run_depth_estimation failed: {e}")
     return save_ply_path