Spaces:
Running
Running
| import os | |
| if "PYOPENGL_PLATFORM" not in os.environ: | |
| os.environ["PYOPENGL_PLATFORM"] = "egl" | |
| import math | |
| import numpy as np | |
| import pyrender | |
| import torch | |
| import trimesh | |
| import cv2 | |
| import gradio as gr | |
| from src.datasets.vitdet_dataset import ViTDetDataset | |
| from src.models import load_hmr2 | |
| # Color of the mesh | |
| LIGHT_BLUE = (0.65098039, 0.74117647, 0.85882353) | |
| class WeakPerspectiveCamera(pyrender.Camera): | |
| def __init__( | |
| self, | |
| scale, | |
| translation, | |
| znear=10.0, | |
| zfar=1000.0, | |
| name=None, | |
| ): | |
| super(WeakPerspectiveCamera, self).__init__( | |
| znear=znear, | |
| zfar=zfar, | |
| name=name, | |
| ) | |
| self.scale = scale | |
| self.translation = translation | |
| def get_projection_matrix(self, width=None, height=None): | |
| P = np.eye(4) | |
| P[0, 0] = self.scale[0] | |
| P[1, 1] = self.scale[1] | |
| P[0, 3] = self.translation[0] * self.scale[0] | |
| P[1, 3] = -self.translation[1] * self.scale[1] | |
| P[2, 2] = -0.1 | |
| return P | |
| class Renderer: | |
| def __init__(self, faces, resolution=(1024, 1024), orig_img=False): | |
| self.resolution = resolution | |
| self.faces = faces | |
| self.orig_img = orig_img | |
| self.renderer = pyrender.OffscreenRenderer( | |
| viewport_width=self.resolution[0], | |
| viewport_height=self.resolution[1], | |
| point_size=1.0, | |
| ) | |
| self.scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], ambient_light=(0.3, 0.3, 0.3)) | |
| light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8) | |
| light_pose = np.eye(4) | |
| light_pose[:3, 3] = [0, -1, 1] | |
| self.scene.add(light, pose=light_pose) | |
| light_pose[:3, 3] = [0, 1, 1] | |
| self.scene.add(light, pose=light_pose) | |
| light_pose[:3, 3] = [1, 1, 2] | |
| self.scene.add(light, pose=light_pose) | |
| def render(self, verts, cam, color=LIGHT_BLUE, znear=1.0, zfar=10000.0): | |
| mesh = trimesh.Trimesh(vertices=verts, faces=self.faces, process=False) | |
| Rx = trimesh.transformations.rotation_matrix(math.radians(180), [1, 0, 0]) | |
| mesh.apply_transform(Rx) | |
| sx, sy, tx, ty = cam | |
| camera = WeakPerspectiveCamera(scale=[sx, sy], translation=[tx, ty], znear=znear, zfar=zfar) | |
| material = pyrender.MetallicRoughnessMaterial( | |
| metallicFactor=0.0, alphaMode="OPAQUE", baseColorFactor=LIGHT_BLUE | |
| ) | |
| mesh = pyrender.Mesh.from_trimesh(mesh, material=material, smooth=True) | |
| mesh_node = self.scene.add(mesh, "mesh") | |
| camera_pose = np.eye(4) | |
| cam_node = self.scene.add(camera, pose=camera_pose) | |
| render_flags = pyrender.RenderFlags.RGBA | |
| rgb, depth = self.renderer.render(self.scene, flags=render_flags) | |
| self.scene.remove_node(mesh_node) | |
| self.scene.remove_node(cam_node) | |
| return rgb, depth | |
| def create_temp_obj(vertices, faces): | |
| mesh = trimesh.Trimesh( | |
| vertices=vertices, | |
| faces=faces, | |
| vertex_colors=np.tile(np.array(LIGHT_BLUE + (1.0,)), (len(vertices), 1)), | |
| ) | |
| temp_path = os.path.join(os.getcwd(), "out_mesh.obj") | |
| mesh.export(temp_path) | |
| return temp_path | |
| def resize_and_pad(img): | |
| original_type = img.dtype | |
| img_to_process = img.copy() | |
| h, w = img_to_process.shape[:2] | |
| target_size = 1024 | |
| scale = min(target_size / w, target_size / h) | |
| new_w = int(w * scale) | |
| new_h = int(h * scale) | |
| resized = cv2.resize(img_to_process, (new_w, new_h), interpolation=cv2.INTER_AREA) | |
| if len(img.shape) == 3: | |
| canvas = np.zeros((target_size, target_size, img.shape[2]), dtype=original_type) | |
| else: | |
| canvas = np.zeros((target_size, target_size), dtype=original_type) | |
| x_offset = (target_size - new_w) // 2 | |
| y_offset = (target_size - new_h) // 2 | |
| canvas[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = resized | |
| return canvas | |
| def process_image(input_image): | |
| img = resize_and_pad(input_image["composite"]) | |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| hmr2, hmr_2_cfg = load_hmr2() | |
| device = torch.device("cpu") | |
| hmr2 = hmr2.to(device) | |
| hmr2.eval() | |
| bbox = [0, 0, img.shape[1], img.shape[0]] | |
| dataset = ViTDetDataset(hmr_2_cfg, img, np.array([bbox])) | |
| dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) | |
| batch = next(iter(dataloader)) | |
| with torch.inference_mode(): | |
| out = hmr2(batch) | |
| pred_verts = hmr2.smpl(**{k: v.float() for k, v in out["pred_smpl_params"].items()}, pose2rot=False).vertices[0] | |
| scale, tx, ty = out["scale"], out["tx"], out["ty"] | |
| obj_verts = pred_verts.detach().cpu().numpy() | |
| obj_verts[:, 1] = -obj_verts[:, 1] | |
| obj_verts[:, 0] = -obj_verts[:, 0] | |
| obj_path = create_temp_obj(obj_verts, hmr2.smpl.faces) | |
| if str(device) == "cpu": | |
| pred_verts = pred_verts * torch.tensor([-1, -1, 1])[None] | |
| renderer = Renderer(hmr2.smpl.faces, resolution=(img.shape[1], img.shape[0])) | |
| factor = 2.0 | |
| rendered, depth = renderer.render( | |
| pred_verts.detach().cpu().numpy(), | |
| (scale * factor, scale * factor, tx / scale, ty / scale), | |
| ) | |
| rendered_float = rendered.astype(np.float32) / 255.0 | |
| out_img_float = img.astype(np.float32) / 255.0 | |
| mask = rendered_float[:, :, 3] | |
| mask = np.stack([mask] * 3, axis=-1) | |
| rendered_rgb = rendered_float[:, :, :3] | |
| mesh_overlay = out_img_float * (1 - mask) + rendered_rgb * mask | |
| mesh_overlay = (mesh_overlay * 255).astype(np.uint8) | |
| return cv2.cvtColor(mesh_overlay, cv2.COLOR_RGB2BGR), obj_path | |
| iface = gr.Interface( | |
| fn=process_image, | |
| analytics_enabled=False, | |
| inputs=gr.ImageEditor( | |
| sources=("upload", "clipboard"), | |
| brush=False, | |
| eraser=False, | |
| crop_size="1:1", | |
| layers=False, | |
| placeholder="Upload an image or select from the examples.", | |
| ), | |
| outputs=[ | |
| gr.Image(label="Mesh overlay"), | |
| gr.Model3D( | |
| clear_color=[0.0, 0.0, 0.0, 0.0], | |
| label="3D Model", | |
| display_mode="point_cloud", | |
| ), | |
| ], | |
| title="GenZoo", | |
| description=""" | |
| # Generative Zoo | |
| https://genzoo.is.tue.mpg.de | |
| ## Usage | |
| 1. **Input**: Select an example image or upload your own. | |
| 2. **Processing**: Crop the image to a square. | |
| 3. **Output**: | |
| - 2D mesh overlay on the original image | |
| - Interactive 3D model visualization | |
| The demo is provided for non-commercial purposes, and its use is governed by the [LICENSE](https://genzoo.is.tue.mpg.de/license.html). \n | |
| We thank the authors of [Humans in 4D: Reconstructing and Tracking Humans with Transformers](https://shubham-goel.github.io/4dhumans/) from which we borrowed components. | |
| """, | |
| examples=[ | |
| "gradio_example_images/000014.png", | |
| "gradio_example_images/000018.png", | |
| "gradio_example_images/000247.png", | |
| "gradio_example_images/000315.png", | |
| "gradio_example_images/001114.png", | |
| ], | |
| ) | |
| iface.launch( | |
| ) | |