| from spaces import GPU |
| import torch |
|
|
| def dummy_warmup(): |
| if torch.cuda.is_available(): |
| print("[INFO] CUDA is available. Running warmup.") |
| |
| x = torch.tensor([1.0]).cuda() |
| else: |
| print("[WARNING] CUDA not available. Skipping warmup.") |
|
|
| import os, shutil |
| import numpy as np |
| import gradio as gr |
| import rembg |
| import trimesh |
| from moge.model.v1 import MoGeModel |
| from utils.geometry import compute_pointmap |
| import cv2 |
| from huggingface_hub import hf_hub_download |
| from PIL import Image |
| import matplotlib.pyplot as plt |
| from eval_wrapper.eval import EvalWrapper, eval_scene |
|
|
|
|
| device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
| outdir = "/tmp/rayst3r" |
| print("Loading MoGe model") |
| moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(device) |
| dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg") |
| dino_model.eval() |
| dino_model.to(device) |
| |
| print("Loading RaySt3R model") |
| rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth") |
| rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu') |
| rayst3r_model = rayst3r_model.to(device) |
| print("Loaded rayst3r_model") |
|
|
|
|
|
|
|
|
| |
|
|
|
|
| def depth2uint16(depth): |
| return depth * torch.iinfo(torch.uint16).max / 10.0 |
| |
| def save_tensor_as_png(tensor: torch.Tensor, path: str, dtype: torch.dtype | None = None): |
| if dtype is None: |
| dtype = tensor.dtype |
| Image.fromarray(tensor.to(dtype).cpu().numpy()).save(path) |
|
|
| def colorize_points_with_turbo_all_dims(points, method='norm',cmap='turbo'): |
| """ |
| Assigns colors to 3D points using the 'turbo' colormap based on a scalar computed from all 3 dimensions. |
| |
| Args: |
| points (np.ndarray): (N, 3) array of 3D points. |
| method (str): Method for reducing 3D point to scalar. Options: 'norm', 'pca'. |
| |
| Returns: |
| np.ndarray: (N, 3) RGB colors in [0, 1]. |
| """ |
| assert points.shape[1] == 3, "Input must be of shape (N, 3)" |
|
|
| if method == 'norm': |
| scalar = np.linalg.norm(points, axis=1) |
| elif method == 'pca': |
| |
| mean = points.mean(axis=0) |
| centered = points - mean |
| u, s, vh = np.linalg.svd(centered, full_matrices=False) |
| scalar = centered @ vh[0] |
| else: |
| raise ValueError(f"Unknown method '{method}'") |
|
|
| |
| scalar_min, scalar_max = scalar.min(), scalar.max() |
| normalized = (scalar - scalar_min) / (scalar_max - scalar_min + 1e-8) |
|
|
| |
| cmap = plt.colormaps.get_cmap(cmap) |
| colors = cmap(normalized)[:, :3] |
|
|
| return colors |
|
|
| def prep_for_rayst3r(img,depth_dict,mask): |
| H, W = img.shape[:2] |
| intrinsics = depth_dict["intrinsics"].detach().cpu() |
| intrinsics[0] *= W |
| intrinsics[1] *= H |
|
|
| input_dir = os.path.join(outdir, "input") |
| if os.path.exists(input_dir): |
| shutil.rmtree(input_dir) |
| os.makedirs(input_dir, exist_ok=True) |
| |
| torch.save(intrinsics, os.path.join(input_dir, "intrinsics.pt")) |
|
|
| |
| depth = depth_dict["depth"].cpu() |
| depth = depth2uint16(depth) |
| save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16) |
|
|
| |
| save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool) |
| |
| save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png")) |
|
|
| @GPU(duration = 120) |
| def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False): |
| prep_for_rayst3r(img,depth_dict,mask) |
| |
| rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model, device = device,set_conf=10).cpu() |
| |
| n_points = min(max_total_points,rayst3r_points.shape[0]) |
| rayst3r_points = rayst3r_points[torch.randperm(rayst3r_points.shape[0])[:n_points]].numpy() |
| |
| rayst3r_points[:,1] = -rayst3r_points[:,1] |
| rayst3r_points[:,2] = -rayst3r_points[:,2] |
| |
| |
| colors = colorize_points_with_turbo_all_dims(rayst3r_points) |
|
|
| |
| scene = trimesh.Scene() |
| pct = trimesh.PointCloud(rayst3r_points, colors=colors, radius=0.01) |
| scene.add_geometry(pct) |
| |
| outfile = os.path.join(outdir, "rayst3r.glb") |
| scene.export(outfile) |
| return outfile |
|
|
|
|
| def input_to_glb(outdir,img,depth_dict,mask,rotated=False): |
| H, W = img.shape[:2] |
| intrinsics = depth_dict["intrinsics"].cpu().numpy() |
| intrinsics[0] *= W |
| intrinsics[1] *= H |
| |
| depth = depth_dict["depth"].cpu().numpy() |
| cam2world = np.eye(4) |
| points_world = compute_pointmap(depth, cam2world, intrinsics) |
|
|
| scene = trimesh.Scene() |
| pts = np.concatenate([p[m] for p,m in zip(points_world,mask)]) |
| col = np.concatenate([c[m] for c,m in zip(img,mask)]) |
|
|
| pts = pts.reshape(-1,3) |
| pts[:,1] = -pts[:,1] |
| pts[:,2] = -pts[:,2] |
|
|
|
|
| pct = trimesh.PointCloud(pts, colors=col.reshape(-1,3)) |
| scene.add_geometry(pct) |
| |
| outfile = os.path.join(outdir, "input.glb") |
| scene.export(outfile) |
| return outfile |
|
|
| @GPU(duration = 10) |
| def depth_moge(input_img): |
| |
| input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1) |
| output = moge_model.infer(input_img_torch) |
| return output |
|
|
| @GPU(duration = 10) |
| def mask_rembg(input_img): |
| |
| output_img = rembg.remove(input_img, alpha_matting=False, post_process_mask=True) |
|
|
| |
| output_np = np.array(output_img) |
| alpha = output_np[..., 3] |
|
|
| |
| kernel = np.ones((3, 3), np.uint8) |
| eroded_alpha = cv2.erode(alpha, kernel, iterations=1) |
| |
| output_np[..., 3] = eroded_alpha |
| |
| mask = output_np[:,:,-1] >= 128 |
| rgb = output_np[:,:,:3] |
| return mask, rgb |
|
|
| @GPU(duration = 10) |
| def process_image(input_img): |
|
|
| rotated = False |
| if input_img.shape[0] > input_img.shape[1]: |
| input_img = cv2.rotate(input_img, cv2.ROTATE_90_COUNTERCLOCKWISE) |
| rotated = True |
| |
| input_img = cv2.resize(input_img, (640, 480)) |
| mask, rgb = mask_rembg(input_img) |
| depth_dict = depth_moge(input_img) |
| if os.path.exists(outdir): |
| shutil.rmtree(outdir) |
| os.makedirs(outdir) |
| input_glb = input_to_glb(outdir,input_img,depth_dict,mask,rotated=rotated) |
| inference_glb = rayst3r_to_glb(input_img,depth_dict,mask,rotated=rotated) |
| |
| return input_glb, inference_glb |
|
|
|
|
| with gr.Blocks(delete_cache=(600, 600)) as demo: |
| gr.Markdown(""" |
| ## Image to 3D in cluttered scene with [RaySt3R](https://rayst3r.github.io/) |
| * Upload an image and click generate. |
| * We mask the input using [Rembg](https://github.com/danielgatis/rembg) and generate depth with [MoGe](https://github.com/microsoft/MoGe). |
| * The input point cloud will be visible in 'input', the output is visible in the "RaySt3R" section. |
| |
| [Project Website](https://rayst3r.github.io/) |
| """) |
|
|
| with gr.Row(): |
| input_image = gr.Image(type="filepath") |
| input_model = gr.Model3D(label="Input") |
| output_model = gr.Model3D(label="RaySt3R") |
|
|
| generate_btn = gr.Button("Generate") |
|
|
| |
| generate_btn.click( |
| fn=process_image, |
| inputs=[input_image], |
| outputs=[input_model, output_model], |
| ) |
|
|
| |
| gr.Examples( |
| examples=[ |
| ["example_scene/ycb.png"], |
| ], |
| inputs=[input_image], |
| outputs=[input_model, output_model], |
| fn=process_image, |
| cache_examples=True, |
| ) |
|
|
| if __name__ == "__main__": |
| dummy_warmup() |
| demo.launch() |
|
|
|
|