import gradio as gr import cv2 import matplotlib import numpy as np import os from PIL import Image import torch import torch.nn.functional as F import open3d as o3d import tempfile from gradio_imageslider import ImageSlider from huggingface_hub import hf_hub_download from ppd.utils.set_seed import set_seed from ppd.utils.align_depth_func import recover_metric_depth_ransac from ppd.utils.depth2pcd import depth2pcd from moge.model.v2 import MoGeModel from ppd.models.ppd import PixelPerfectDepth try: import spaces HUGGINFACE_SPACES_INSTALLED = True except ImportError: HUGGINFACE_SPACES_INSTALLED = False css = """ #img-display-container { max-height: 100vh; } #img-display-input { max-height: 100vh; } #img-display-output { max-height: 100vh; } #download { height: 62px; } #img-display-output .image-slider-image { object-fit: contain !important; width: 100% !important; height: 100% !important; } """ set_seed(666) DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' default_steps = 10 model = PixelPerfectDepth(sampling_steps=default_steps) ckpt_path = hf_hub_download( repo_id="gangweix/Pixel-Perfect-Depth", filename="ppd.pth", repo_type="model" ) state_dict = torch.load(ckpt_path, map_location="cpu") model.load_state_dict(state_dict, strict=False) model = model.eval() moge_model = MoGeModel.from_pretrained("Ruicheng/moge-2-vitl-normal").eval() def main(share=True): print("Initializing Pixel-Perfect Depth Demo...") cmap = matplotlib.colormaps.get_cmap('Spectral') title = "# Pixel-Perfect Depth" description = """Official demo for **Pixel-Perfect Depth**. Please refer to our [paper](), [project page](https://pixel-perfect-depth.github.io), and [github](https://github.com/gangweix/pixel-perfect-depth) for more details.""" @(spaces.GPU if HUGGINFACE_SPACES_INSTALLED else (lambda x: x)) def predict_depth(image, denoise_steps): global model model = model.to(DEVICE) depth, resize_image = model.infer_image(image, sampling_steps=denoise_steps) return depth, resize_image @(spaces.GPU if HUGGINFACE_SPACES_INSTALLED else (lambda x: x)) def predict_moge_depth(image): image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = torch.tensor(image / 255, dtype=torch.float32, device=DEVICE).permute(2, 0, 1) global moge_model moge_model = moge_model.to(DEVICE) metric_depth, mask, intrinsics = moge_model.infer(image) metric_depth[~mask] = metric_depth[mask].max() return metric_depth, mask, intrinsics def on_submit(image, denoise_steps, apply_filter): H, W = image.shape[:2] ppd_depth, resize_image = predict_depth(image[:, :, ::-1], denoise_steps) resize_H, resize_W = resize_image.shape[:2] # moge provide metric depth and intrinsics moge_depth, mask, intrinsics = predict_moge_depth(resize_image) # relative depth -> metric depth metric_depth = recover_metric_depth_ransac(ppd_depth, moge_depth, mask) intrinsics[0, 0] *= resize_W intrinsics[1, 1] *= resize_H intrinsics[0, 2] *= resize_W intrinsics[1, 2] *= resize_H # metric depth -> point cloud pcd = depth2pcd(metric_depth, intrinsics, color=cv2.cvtColor(resize_image, cv2.COLOR_BGR2RGB), input_mask=mask, ret_pcd=True) if apply_filter: cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0) pcd = pcd.select_by_index(ind) # save pcd to temporary .ply tmp_ply = tempfile.NamedTemporaryFile(suffix='.ply', delete=False) o3d.io.write_point_cloud(tmp_ply.name, pcd) vertices = np.asarray(pcd.points) vertex_colors = (np.asarray(pcd.colors) * 255).astype(np.uint8) mesh = trimesh.PointCloud(vertices=vertices, colors=vertex_colors) tmp_glb = tempfile.NamedTemporaryFile(suffix='.glb', delete=False) mesh.export(tmp_glb.name) # save raw depth (npy) depth = cv2.resize(ppd_depth, (W, H), interpolation=cv2.INTER_LINEAR) tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.npy', delete=False) np.save(tmp_raw_depth.name, depth) depth_vis = (depth - depth.min()) / (depth.max() - depth.min() + 1e-5) * 255.0 depth_vis = depth_vis.astype(np.uint8) colored_depth = (cmap(depth_vis)[:, :, :3] * 255).astype(np.uint8) split_region = np.ones((image.shape[0], 50, 3), dtype=np.uint8) * 255 combined_result = cv2.hconcat([image[:, :, ::-1], split_region, colored_depth[:, :, ::-1]]) tmp_concat = tempfile.NamedTemporaryFile(suffix='.png', delete=False) cv2.imwrite(tmp_concat.name, combined_result) return [(image, colored_depth), tmp_glb.name, tmp_concat.name, tmp_raw_depth.name, tmp_ply.name] with gr.Blocks(css=css) as demo: gr.Markdown(title) gr.Markdown(description) gr.Markdown("### Depth Prediction demo") with gr.Row(): # Left: input image + settings with gr.Column(): input_image = gr.Image(label="Input Image", image_mode="RGB", type='numpy', elem_id='img-display-input') with gr.Accordion(label="Settings", open=False): denoise_steps = gr.Slider(label="Denoising Steps", minimum=1, maximum=50, value=10, step=1) apply_filter = gr.Checkbox(label="Apply filter points", value=True) submit_btn = gr.Button(value="Predict Depth") # Right: 3D point cloud + depth with gr.Column(): with gr.Tabs(): with gr.Tab("3D View"): model_3d = gr.Model3D(display_mode="solid", label="3D Point Map", clear_color=[1,1,1,1], height="60vh") with gr.Tab("Depth"): depth_map = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5) concat_file = gr.File(label="Concatenated visualization (image+depth)", elem_id="image-depth-download") raw_depth_file = gr.File(label="Raw depth output (saved as .npy)", elem_id="download") pcd_file = gr.File(label="Point Cloud (.ply)", elem_id="download-ply") submit_btn.click( on_submit, inputs=[input_image, denoise_steps, apply_filter], outputs=[depth_map, model_3d, concat_file, raw_depth_file, pcd_file] ) example_files = os.listdir('assets/examples') example_files.sort() example_files = [os.path.join('assets/examples', filename) for filename in example_files] examples = gr.Examples( examples=example_files, inputs=[input_image], outputs=[depth_map, model_3d, concat_file, raw_depth_file, pcd_file], fn=on_submit ) demo.queue().launch(share=share) if __name__ == '__main__': main(share=True)