File size: 5,814 Bytes
0343ccd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import os
import sys
from pathlib import Path
if (_package_root := str(Path(__file__).absolute().parents[2])) not in sys.path:
sys.path.insert(0, _package_root)
import time
import uuid
import tempfile
from typing import *
import atexit
from concurrent.futures import ThreadPoolExecutor
import click
@click.command(help='Web demo')
@click.option('--share', is_flag=True, help='Whether to run the app in shared mode.')
@click.option('--max_size', default=800, type=int, help='The maximum size of the input image.')
@click.option('--pretrained', 'pretrained_model_name_or_path', default='Ruicheng/moge-vitl', help='The name or path of the pre-trained model.')
def main(share: bool, max_size: int, pretrained_model_name_or_path: str):
# Lazy import
import cv2
import torch
import numpy as np
import trimesh
import trimesh.visual
from PIL import Image
import gradio as gr
try:
import spaces # This is for deployment at huggingface.co/spaces
HUGGINFACE_SPACES_INSTALLED = True
except ImportError:
HUGGINFACE_SPACES_INSTALLED = False
import utils3d
from moge.utils.vis import colorize_depth
from moge.model.v1 import MoGeModel
model = MoGeModel.from_pretrained(pretrained_model_name_or_path).cuda().eval()
thread_pool_executor = ThreadPoolExecutor(max_workers=1)
def delete_later(path: Union[str, os.PathLike], delay: int = 300):
def _delete():
try:
os.remove(path)
except:
pass
def _wait_and_delete():
time.sleep(delay)
_delete(path)
thread_pool_executor.submit(_wait_and_delete)
atexit.register(_delete)
# Inference on GPU.
@(spaces.GPU if HUGGINFACE_SPACES_INSTALLED else lambda x: x)
def run_with_gpu(image: np.ndarray) -> Dict[str, np.ndarray]:
image_tensor = torch.tensor(image, dtype=torch.float32, device=torch.device('cuda')).permute(2, 0, 1) / 255
output = model.infer(image_tensor, apply_mask=True, resolution_level=9)
output = {k: v.cpu().numpy() for k, v in output.items()}
return output
# Full inference pipeline
def run(image: np.ndarray, remove_edge: bool = True):
run_id = str(uuid.uuid4())
larger_size = max(image.shape[:2])
if larger_size > max_size:
scale = max_size / larger_size
image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
height, width = image.shape[:2]
output = run_with_gpu(image)
points, depth, mask = output['points'], output['depth'], output['mask']
normals, normals_mask = utils3d.numpy.points_to_normals(points, mask=mask)
fov_x, fov_y = utils3d.numpy.intrinsics_to_fov(output['intrinsics'])
fov_x, fov_y = np.rad2deg([fov_x, fov_y])
faces, vertices, vertex_colors, vertex_uvs = utils3d.numpy.image_mesh(
points,
image.astype(np.float32) / 255,
utils3d.numpy.image_uv(width=width, height=height),
mask=mask & ~(utils3d.numpy.depth_edge(depth, rtol=0.03, mask=mask) & utils3d.numpy.normals_edge(normals, tol=5, mask=normals_mask)),
tri=True
)
vertices, vertex_uvs = vertices * [1, -1, -1], vertex_uvs * [1, -1] + [0, 1]
tempdir = Path(tempfile.gettempdir(), 'moge')
tempdir.mkdir(exist_ok=True)
output_glb_path = Path(tempdir, f'{run_id}.glb')
output_glb_path.parent.mkdir(exist_ok=True)
trimesh.Trimesh(
vertices=vertices * [-1, 1, -1], # No idea why Gradio 3D Viewer' default camera is flipped
faces=faces,
visual = trimesh.visual.texture.TextureVisuals(
uv=vertex_uvs,
material=trimesh.visual.material.PBRMaterial(
baseColorTexture=Image.fromarray(image),
metallicFactor=0.5,
roughnessFactor=1.0
)
),
process=False
).export(output_glb_path)
output_ply_path = Path(tempdir, f'{run_id}.ply')
output_ply_path.parent.mkdir(exist_ok=True)
trimesh.Trimesh(
vertices=vertices,
faces=faces,
vertex_colors=vertex_colors,
process=False
).export(output_ply_path)
colorized_depth = colorize_depth(depth)
delete_later(output_glb_path, delay=300)
delete_later(output_ply_path, delay=300)
return (
colorized_depth,
output_glb_path,
output_ply_path.as_posix(),
f'Horizontal FOV: {fov_x:.2f}, Vertical FOV: {fov_y:.2f}'
)
gr.Interface(
fn=run,
inputs=[
gr.Image(type="numpy", image_mode="RGB"),
gr.Checkbox(True, label="Remove edges"),
],
outputs=[
gr.Image(type="numpy", label="Depth map (colorized)", format='png'),
gr.Model3D(display_mode="solid", clear_color=[1.0, 1.0, 1.0, 1.0], label="3D Viewer"),
gr.File(type="filepath", label="Download the model as .ply file"),
gr.Textbox('--', label="FOV (Horizontal, Vertical)")
],
title=None,
description=f"""
## Turn a 2D image into a 3D point map with [MoGe](https://wangrc.site/MoGePage/)
NOTE:
* The maximum size is set to {max_size:d}px for efficiency purpose. Oversized images will be downsampled.
* The color in the 3D viewer may look dark due to rendering of 3D viewer. You may download the 3D model as .glb or .ply file to view it in other 3D viewers.
""",
clear_btn=None,
allow_flagging="never",
theme=gr.themes.Soft()
).launch(share=share)
if __name__ == '__main__':
main() |