xinjie.wang
update
ddc47cd
# Project EmbodiedGen
#
# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
import os
import sys
from collections import defaultdict
import numpy as np
import spaces
import torch
from tqdm import tqdm
current_file_path = os.path.abspath(__file__)
current_dir = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_dir, "../.."))
from thirdparty.TRELLIS.trellis.renderers import GaussianRenderer, MeshRenderer
from thirdparty.TRELLIS.trellis.utils.render_utils import (
yaw_pitch_r_fov_to_extrinsics_intrinsics,
)
__all__ = [
"render_video",
]
@spaces.GPU
def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
renderer = MeshRenderer()
renderer.rendering_options.resolution = options.get("resolution", 512)
renderer.rendering_options.near = options.get("near", 1)
renderer.rendering_options.far = options.get("far", 100)
renderer.rendering_options.ssaa = options.get("ssaa", 4)
rets = {}
for extr, intr in tqdm(zip(extrinsics, intrinsics), desc="Rendering"):
res = renderer.render(sample, extr, intr)
if "normal" not in rets:
rets["normal"] = []
normal = torch.lerp(
torch.zeros_like(res["normal"]), res["normal"], res["mask"]
)
normal = np.clip(
normal.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
).astype(np.uint8)
rets["normal"].append(normal)
return rets
@spaces.GPU
def render_gs_frames(
sample,
extrinsics,
intrinsics,
options=None,
colors_overwrite=None,
verbose=True,
**kwargs,
):
def to_img(tensor):
return np.clip(
tensor.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
).astype(np.uint8)
def to_numpy(tensor):
return tensor.detach().cpu().numpy()
renderer = GaussianRenderer()
renderer.pipe.kernel_size = kwargs.get("kernel_size", 0.1)
renderer.pipe.use_mip_gaussian = True
defaults = {
"resolution": 512,
"near": 0.8,
"far": 1.6,
"bg_color": (0, 0, 0),
"ssaa": 1,
}
final_options = {**defaults, **(options or {})}
for k, v in final_options.items():
if hasattr(renderer.rendering_options, k):
setattr(renderer.rendering_options, k, v)
outputs = defaultdict(list)
iterator = zip(extrinsics, intrinsics)
if verbose:
iterator = tqdm(iterator, total=len(extrinsics), desc="Rendering")
for extr, intr in iterator:
res = renderer.render(
sample, extr, intr, colors_overwrite=colors_overwrite
)
outputs["color"].append(to_img(res["color"]))
depth = res.get("percent_depth") or res.get("depth")
outputs["depth"].append(to_numpy(depth) if depth is not None else None)
return dict(outputs)
@spaces.GPU
def render_video(
sample,
resolution=512,
bg_color=(0, 0, 0),
num_frames=300,
r=2,
fov=40,
**kwargs,
):
yaws = torch.linspace(0, 2 * 3.1415, num_frames)
yaws = yaws.tolist()
pitch = [0.5] * num_frames
extrinsics, intrinsics = yaw_pitch_r_fov_to_extrinsics_intrinsics(
yaws, pitch, r, fov
)
render_fn = (
render_mesh_frames
if sample.__class__.__name__ == "MeshExtractResult"
else render_gs_frames
)
result = render_fn(
sample,
extrinsics,
intrinsics,
{"resolution": resolution, "bg_color": bg_color},
**kwargs,
)
return result