Spaces:

HorizonRobotics
/

3D-Fixer

Running on Zero

App Files Files Community

JasonYinnnn commited on 14 days ago

Commit

a0fbf94

1 Parent(s): a08e831

use gr.State

Browse files

Files changed (1) hide show

app.py +58 -53

app.py CHANGED Viewed

@@ -39,8 +39,7 @@ DTYPE = torch.float16
 DEVICE = "cpu"
 VALID_RATIO_THRESHOLD = 0.005
 CROP_SIZE = 518
-work_space = None
-dpt_pack = None
 generated_object_map = {}
 ############## 3D-Fixer model
@@ -148,7 +147,6 @@ def run_segmentation(
 ) -> Image.Image:
     rgb_image = image_prompts["image"].convert("RGB")
-    global work_space
     global sam_segmentator
     device = "cpu"
@@ -176,16 +174,19 @@ def run_segmentation(
         seg_map_pil = plot_segmentation(rgb_image, detections)
     cleanup_tmp(TMP_DIR, expire_seconds=3600)
-    work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
-    os.makedirs(work_space, exist_ok=True)
-    seg_map_pil.save(os.path.join(work_space, 'mask.png'))
-    return seg_map_pil
 @spaces.GPU
 def run_depth_estimation(
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
 ) -> Image.Image:
     rgb_image = image_prompts["image"].convert("RGB")
@@ -201,14 +202,11 @@ def run_depth_estimation(
     dtype = torch.float16 if device == 'cuda' else torch.float32
     moge_v2_dpt_model = moge_v2_dpt_model.to(device=device, dtype=dtype)
-    global dpt_pack
-    global work_space
     if work_space is None:
-        work_space = os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}")
-        os.makedirs(work_space, exist_ok=True)
-    global generated_object_map
-    generated_object_map = {}
     origin_W, origin_H = rgb_image.size
     if max(origin_H, origin_W) > 1024:
@@ -238,12 +236,12 @@ def run_depth_estimation(
         ])
     ).to(dtype=torch.float32, device=device)
-    dpt_pack = {
-        'c2w': c2w,
-        'K': K,
-        'depth_mask': depth_mask,
-        'depth': depth
-    }
     instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
     seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
@@ -260,7 +258,7 @@ def run_depth_estimation(
     scene_est_depth_pts, scene_est_depth_pts_colors = \
         project2ply(depth_mask.to(device), depth.to(device), input_image.to(device), K.to(device), c2w.to(device))
-    save_ply_path = os.path.join(work_space, "scene_pcd.glb")
     fg_depth_pts, _ = \
         project2ply(fg_mask.to(device), depth.to(device), input_image.to(device), K.to(device), c2w.to(device))
@@ -269,22 +267,22 @@ def run_depth_estimation(
     if trans.shape[0] == 1:
         trans = trans[0]
-    dpt_pack.update(
         {
             "trans": trans,
             "scale": scale,
         }
     )
-    for k, v in dpt_pack.items():
         if isinstance(v, torch.Tensor):
-            dpt_pack[k] = v.to('cpu')
     trimesh.PointCloud(scene_est_depth_pts.reshape(-1, 3), scene_est_depth_pts_colors.reshape(-1, 3)).\
         apply_translation(-trans).apply_scale(1. / (scale + 1e-6)).\
         apply_transform(rot).export(save_ply_path)
-    return save_ply_path
 def save_image(img, save_path):
@@ -307,7 +305,7 @@ def export_scene_glb(trimeshes, work_space, scene_name):
 def get_duration(rgb_image, seg_image, seed, randomize_seed,
                  num_inference_steps, guidance_scale, cfg_interval_start,
-                 cfg_interval_end, t_rescale):
     instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
     step_duration = 15.0
     return instance_labels.shape[0] * step_duration + 60
@@ -323,8 +321,16 @@ def run_generation(
     cfg_interval_start: float = 0.5,
     cfg_interval_end: float = 1.0,
     t_rescale: float = 3.0,
 ):
     from threeDFixer.pipelines import ThreeDFixerPipeline
     from threeDFixer.datasets.utils import (
         edge_mask_morph_gradient,
@@ -377,9 +383,6 @@ def run_generation(
         return instance_glb_path, glb
-    global dpt_pack
-    global work_space
-    global generated_object_map
     generated_object_map = {}
     run_id = str(uuid.uuid4())
@@ -397,11 +400,11 @@ def run_generation(
         seed = random.randint(0, MAX_SEED)
     set_random_seed(seed)
-    H, W = dpt_pack['depth_mask'].shape
     rgb_image = rgb_image.resize((W, H), Image.Resampling.LANCZOS)
     seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
-    depth_mask = dpt_pack['depth_mask'].detach().cpu().numpy() > 0
     seg_image = np.array(seg_image)
     mask_pack = []
@@ -416,8 +419,8 @@ def run_generation(
     results = []
     trimeshes = []
-    trans = dpt_pack['trans']
-    scale = dpt_pack['scale']
     current_scene_path = None
     pending_exports = []
@@ -534,7 +537,7 @@ def run_generation(
             trimeshes.append(glb)
             current_scene_path = export_scene_glb(
                 trimeshes=trimeshes,
-                work_space=work_space,
                 scene_name=f"{run_id}_scene_step_{len(trimeshes)}.glb",
             )
             any_update = True
@@ -566,12 +569,12 @@ def run_generation(
                 if flushed is not None:
                     yield flushed
-                est_depth = dpt_pack['depth'].to('cpu')
-                c2w = dpt_pack['c2w'].to('cpu')
-                K = dpt_pack['K'].to('cpu')
-                intrinsics = dpt_pack['K'].float().to(DEVICE)
-                extrinsics = copy.deepcopy(dpt_pack['c2w']).float().to(DEVICE)
                 extrinsics[:3, 1:3] *= -1
                 object_mask = object_mask > 0
@@ -590,12 +593,12 @@ def run_generation(
                 instance_image, instance_mask, instance_rays_o, instance_rays_d, instance_rays_c, \
                     instance_rays_t = process_instance_image(image, instance_mask, color_mask, est_depth, K, c2w, CROP_SIZE)
-                save_image(scene_image, os.path.join(work_space, f'input_scene_image_{instance_name}.png'))
-                save_image(scene_image_masked, os.path.join(work_space, f'input_scene_image_masked_{instance_name}.png'))
-                save_image(instance_image, os.path.join(work_space, f'input_instance_image_{instance_name}.png'))
                 save_image(
                     torch.cat([instance_image, instance_mask]),
-                    os.path.join(work_space, f'input_instance_image_masked_{instance_name}.png')
                 )
                 pcd_points = (
@@ -607,7 +610,7 @@ def run_generation(
                 save_projected_colored_pcd(
                     pcd_points,
                     repeat(pcd_colors, 'n -> n c', c=3),
-                    f"{work_space}/instance_est_depth_{instance_name}.ply"
                 )
                 with torch.no_grad():
@@ -634,10 +637,10 @@ def run_generation(
                     )
                 mp4_path = os.path.abspath(
-                    os.path.join(work_space, f"{run_id}_instance_gs_fine_{instance_name}.mp4")
                 )
                 poster_path = os.path.abspath(
-                    os.path.join(work_space, f"{run_id}_instance_gs_fine_{instance_name}.png")
                 )
                 video = render_utils.render_video(
@@ -678,7 +681,7 @@ def run_generation(
                     trans=trans,
                     scale=scale,
                     rot=rot,
-                    work_space=work_space,
                     instance_name=instance_name,
                     run_id=run_id,
                 )
@@ -708,7 +711,7 @@ def run_generation(
     if len(ready_items) > 0:
         final_scene_path = export_scene_glb(
             trimeshes=trimeshes,
-            work_space=work_space,
             scene_name=f"{run_id}_scene_final.glb",
         )
@@ -740,6 +743,7 @@ def update_single_download(selected_name):
 # Demo
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
     with gr.Column():
@@ -812,7 +816,6 @@ with gr.Blocks() as demo:
         with gr.Row():
             gr.Examples(
                 examples=EXAMPLES,
-                fn=run_generation,
                 inputs=[image_prompts, seg_image, seed, randomize_seed, num_inference_steps, guidance_scale, cfg_interval_start, cfg_interval_end, t_rescale],
                 outputs=[model_output, download_glb, seed],
                 cache_examples=False,
@@ -824,16 +827,17 @@ with gr.Blocks() as demo:
             image_prompts,
             polygon_refinement,
         ],
-        outputs=[seg_image],
     ).then(lambda: gr.Button(interactive=True), outputs=[dpt_button])
     dpt_button.click(
         run_depth_estimation,
         inputs=[
             image_prompts,
-            seg_image
         ],
-        outputs=[dpt_model_output],
     ).then(lambda: gr.Button(interactive=True), outputs=[gen_button])
     gen_button.click(
@@ -847,7 +851,8 @@ with gr.Blocks() as demo:
             guidance_scale,
             cfg_interval_start,
             cfg_interval_end,
-            t_rescale
         ],
         outputs=[model_output,
                  stream_output,

 DEVICE = "cpu"
 VALID_RATIO_THRESHOLD = 0.005
 CROP_SIZE = 518
+work_space = None
 generated_object_map = {}
 ############## 3D-Fixer model
 ) -> Image.Image:
     rgb_image = image_prompts["image"].convert("RGB")
     global sam_segmentator
     device = "cpu"
         seg_map_pil = plot_segmentation(rgb_image, detections)
     cleanup_tmp(TMP_DIR, expire_seconds=3600)
+    work_space = {
+        "dir": os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}"),
+    }
+    os.makedirs(work_space["dir"], exist_ok=True)
+    seg_map_pil.save(os.path.join(work_space["dir"], "mask.png"))
+    return seg_map_pil, work_space
 @spaces.GPU
 def run_depth_estimation(
     image_prompts: Any,
     seg_image: Union[str, Image.Image],
+    work_space: dict,
 ) -> Image.Image:
     rgb_image = image_prompts["image"].convert("RGB")
     dtype = torch.float16 if device == 'cuda' else torch.float32
     moge_v2_dpt_model = moge_v2_dpt_model.to(device=device, dtype=dtype)
     if work_space is None:
+        work_space = {
+            "dir": os.path.join(TMP_DIR, f"work_space_{uuid.uuid4()}"),
+        }
+        os.makedirs(work_space["dir"], exist_ok=True)
     origin_W, origin_H = rgb_image.size
     if max(origin_H, origin_W) > 1024:
         ])
     ).to(dtype=torch.float32, device=device)
+    work_space.update({
+        "c2w": c2w,
+        "K": K,
+        "depth_mask": depth_mask,
+        "depth": depth,
+    })
     instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
     seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
     scene_est_depth_pts, scene_est_depth_pts_colors = \
         project2ply(depth_mask.to(device), depth.to(device), input_image.to(device), K.to(device), c2w.to(device))
+    save_ply_path = os.path.join(work_space["dir"], "scene_pcd.glb")
     fg_depth_pts, _ = \
         project2ply(fg_mask.to(device), depth.to(device), input_image.to(device), K.to(device), c2w.to(device))
     if trans.shape[0] == 1:
         trans = trans[0]
+    work_space.update(
         {
             "trans": trans,
             "scale": scale,
         }
     )
+    for k, v in work_space.items():
         if isinstance(v, torch.Tensor):
+            work_space[k] = v.to('cpu')
     trimesh.PointCloud(scene_est_depth_pts.reshape(-1, 3), scene_est_depth_pts_colors.reshape(-1, 3)).\
         apply_translation(-trans).apply_scale(1. / (scale + 1e-6)).\
         apply_transform(rot).export(save_ply_path)
+    return save_ply_path, work_space
 def save_image(img, save_path):
 def get_duration(rgb_image, seg_image, seed, randomize_seed,
                  num_inference_steps, guidance_scale, cfg_interval_start,
+                 cfg_interval_end, t_rescale, work_space):
     instance_labels = np.unique(np.array(seg_image).reshape(-1, 3), axis=0)
     step_duration = 15.0
     return instance_labels.shape[0] * step_duration + 60
     cfg_interval_start: float = 0.5,
     cfg_interval_end: float = 1.0,
     t_rescale: float = 3.0,
+    work_space: dict = None,
 ):
+    if work_space is None:
+        raise gr.Error("Please run step 1 and step 2 first.")
+    required_keys = ["dir", "depth_mask", "depth", "K", "c2w", "trans", "scale"]
+    missing = [k for k in required_keys if k not in work_space]
+    if missing:
+        raise gr.Error(f"Missing workspace fields: {missing}. Please run depth estimation (step 2) first.")
     from threeDFixer.pipelines import ThreeDFixerPipeline
     from threeDFixer.datasets.utils import (
         edge_mask_morph_gradient,
         return instance_glb_path, glb
     generated_object_map = {}
     run_id = str(uuid.uuid4())
         seed = random.randint(0, MAX_SEED)
     set_random_seed(seed)
+    H, W = work_space['depth_mask'].shape
     rgb_image = rgb_image.resize((W, H), Image.Resampling.LANCZOS)
     seg_image = seg_image.resize((W, H), Image.Resampling.LANCZOS)
+    depth_mask = work_space['depth_mask'].detach().cpu().numpy() > 0
     seg_image = np.array(seg_image)
     mask_pack = []
     results = []
     trimeshes = []
+    trans = work_space['trans']
+    scale = work_space['scale']
     current_scene_path = None
     pending_exports = []
             trimeshes.append(glb)
             current_scene_path = export_scene_glb(
                 trimeshes=trimeshes,
+                work_space=work_space['dir'],
                 scene_name=f"{run_id}_scene_step_{len(trimeshes)}.glb",
             )
             any_update = True
                 if flushed is not None:
                     yield flushed
+                est_depth = work_space['depth'].to('cpu')
+                c2w = work_space['c2w'].to('cpu')
+                K = work_space['K'].to('cpu')
+                intrinsics = work_space['K'].float().to(DEVICE)
+                extrinsics = copy.deepcopy(work_space['c2w']).float().to(DEVICE)
                 extrinsics[:3, 1:3] *= -1
                 object_mask = object_mask > 0
                 instance_image, instance_mask, instance_rays_o, instance_rays_d, instance_rays_c, \
                     instance_rays_t = process_instance_image(image, instance_mask, color_mask, est_depth, K, c2w, CROP_SIZE)
+                save_image(scene_image, os.path.join(work_space['dir'], f'input_scene_image_{instance_name}.png'))
+                save_image(scene_image_masked, os.path.join(work_space['dir'], f'input_scene_image_masked_{instance_name}.png'))
+                save_image(instance_image, os.path.join(work_space['dir'], f'input_instance_image_{instance_name}.png'))
                 save_image(
                     torch.cat([instance_image, instance_mask]),
+                    os.path.join(work_space['dir'], f'input_instance_image_masked_{instance_name}.png')
                 )
                 pcd_points = (
                 save_projected_colored_pcd(
                     pcd_points,
                     repeat(pcd_colors, 'n -> n c', c=3),
+                    f"{work_space['dir']}/instance_est_depth_{instance_name}.ply"
                 )
                 with torch.no_grad():
                     )
                 mp4_path = os.path.abspath(
+                    os.path.join(work_space['dir'], f"{run_id}_instance_gs_fine_{instance_name}.mp4")
                 )
                 poster_path = os.path.abspath(
+                    os.path.join(work_space['dir'], f"{run_id}_instance_gs_fine_{instance_name}.png")
                 )
                 video = render_utils.render_video(
                     trans=trans,
                     scale=scale,
                     rot=rot,
+                    work_space=work_space['dir'],
                     instance_name=instance_name,
                     run_id=run_id,
                 )
     if len(ready_items) > 0:
         final_scene_path = export_scene_glb(
             trimeshes=trimeshes,
+            work_space=work_space['dir'],
             scene_name=f"{run_id}_scene_final.glb",
         )
 # Demo
 with gr.Blocks() as demo:
+    gr_work_space = gr.State(value=None)
     gr.Markdown(MARKDOWN)
     with gr.Column():
         with gr.Row():
             gr.Examples(
                 examples=EXAMPLES,
                 inputs=[image_prompts, seg_image, seed, randomize_seed, num_inference_steps, guidance_scale, cfg_interval_start, cfg_interval_end, t_rescale],
                 outputs=[model_output, download_glb, seed],
                 cache_examples=False,
             image_prompts,
             polygon_refinement,
         ],
+        outputs=[seg_image, gr_work_space],
     ).then(lambda: gr.Button(interactive=True), outputs=[dpt_button])
     dpt_button.click(
         run_depth_estimation,
         inputs=[
             image_prompts,
+            seg_image,
+            gr_work_space
         ],
+        outputs=[dpt_model_output, gr_work_space],
     ).then(lambda: gr.Button(interactive=True), outputs=[gen_button])
     gen_button.click(
             guidance_scale,
             cfg_interval_start,
             cfg_interval_end,
+            t_rescale,
+            gr_work_space
         ],
         outputs=[model_output,
                  stream_output,