"""Headless point-cloud rasterizer with orbiting cameras and video assembly. This renders colored point clouds entirely with PyTorch tensor ops (a super-sampled painter's algorithm with a proper z-buffer), so it works on any machine with a GPU and needs no OpenGL/EGL/Filament. The same primitive renders every modality (RGB, depth, normals, canonical, tracks) — only the per-point colors change — which is what makes the 180-degree "grand tour" morph possible. """ from __future__ import annotations import math import numpy as np import torch # --------------------------------------------------------------------------- # # Cameras # --------------------------------------------------------------------------- # def look_at(eye, center, up): """OpenCV world-to-camera ``R (3,3), t (3,)`` looking from ``eye`` at ``center``.""" eye = np.asarray(eye, np.float64) center = np.asarray(center, np.float64) up = np.asarray(up, np.float64) z = center - eye z /= np.linalg.norm(z) + 1e-12 x = np.cross(z, up) nx = np.linalg.norm(x) if nx < 1e-8: # forward parallel to up; nudge up = up + np.array([1e-3, 0.0, 0.0]) x = np.cross(z, up) nx = np.linalg.norm(x) x /= nx y = np.cross(z, x) R = np.stack([x, y, z], axis=0) t = -R @ eye return R.astype(np.float32), t.astype(np.float32) def render_intrinsics(size: int, fov_deg: float = 38.0) -> np.ndarray: """Square pinhole intrinsics for a ``size x size`` render.""" f = (size * 0.5) / math.tan(math.radians(fov_deg) * 0.5) return np.array([[f, 0, size * 0.5], [0, f, size * 0.5], [0, 0, 1.0]], dtype=np.float32) def scene_bounds(points: np.ndarray): """Bounding-box center and per-axis half-extents of a point set. Uses robust (0.5/99.5 percentile) bounds so a few flying pixels don't inflate the framing, while still including the full head (which is far from those percentiles). Returns ``(center (3,), half_extents (3,))``. """ pts = np.asarray(points, np.float32) pts = pts[np.isfinite(pts).all(axis=1)] if pts.shape[0] == 0: return np.zeros(3, np.float32), np.ones(3, np.float32) lo = np.percentile(pts, 0.5, axis=0) hi = np.percentile(pts, 99.5, axis=0) center = (lo + hi) * 0.5 half_extents = np.maximum((hi - lo) * 0.5, 1e-4) return center.astype(np.float32), half_extents.astype(np.float32) def orbit_camera(center, scale, azimuth_deg, elevation_deg=0.0, dist_factor=2.4, up=(0.0, -1.0, 0.0), dist=None): """Camera ``(R, t)`` orbiting the face from *outside*. Azimuth 0 reproduces the original frontal viewpoint (the face front faces the -Z direction in the unprojected OpenCV cloud, so the camera sits on the -Z side and looks toward +Z). Positive azimuth swings the camera around the vertical axis. ``up=(0,-1,0)`` keeps faces upright (+Y is down in OpenCV). ``dist`` overrides the camera distance (else ``scale * dist_factor``). """ az = math.radians(azimuth_deg) el = math.radians(elevation_deg) if dist is None: dist = scale * dist_factor ex = -dist * math.cos(el) * math.sin(az) ez = -dist * math.cos(el) * math.cos(az) ey = -dist * math.sin(el) # up is -Y, so look slightly from above eye = np.asarray(center, np.float64) + np.array([ex, ey, ez], np.float64) return look_at(eye, center, up) def sway_azimuths(n: int, amplitude: float = 80.0, cycles: float = 1.0) -> np.ndarray: """Orbit schedule (deg): 0 -> -amp -> 0 -> +amp -> 0, i.e. swing left, back to center, right, and back — repeated ``cycles`` times. Mirrors the keyframed yaw cycle used by the repo's ``render_*.py`` scripts. """ key = [0.0] for _ in range(max(1, int(round(cycles)))): key += [-amplitude, 0.0, amplitude, 0.0] key = np.array(key, np.float64) key_pos = np.linspace(0, 1, len(key)) return np.interp(np.linspace(0, 1, max(n, 1)), key_pos, key) def linspace_azimuths(n: int, start: float, stop: float) -> np.ndarray: return np.linspace(start, stop, max(n, 1)) # --------------------------------------------------------------------------- # # Rasterizer # --------------------------------------------------------------------------- # @torch.no_grad() def rasterize(points, colors, R, t, K, size: int, radius: int = 2, bg=(255, 255, 255), supersample: int = 2, device="cuda", return_mask: bool = False): """Render a colored point cloud to a ``size x size`` uint8 RGB image. Uses a super-sampled z-buffer (nearest point wins per pixel) and average-pool down-sampling for anti-aliasing. """ dev = torch.device(device if torch.cuda.is_available() else "cpu") P = torch.as_tensor(np.ascontiguousarray(points), dtype=torch.float32, device=dev) C = torch.as_tensor(np.ascontiguousarray(colors), dtype=torch.float32, device=dev) if P.numel() == 0: img = np.tile(np.array(bg, np.uint8), (size, size, 1)) return (img, np.zeros((size, size), bool)) if return_mask else img Rt = torch.as_tensor(np.asarray(R), dtype=torch.float32, device=dev) tt = torch.as_tensor(np.asarray(t), dtype=torch.float32, device=dev) Xc = P @ Rt.T + tt z = Xc[:, 2] ss = supersample Hs = Ws = size * ss Ks = torch.as_tensor(np.asarray(K), dtype=torch.float32, device=dev).clone() Ks[:2, :] *= ss proj = Xc @ Ks.T inv = 1.0 / proj[:, 2].clamp(min=1e-6) ui = (proj[:, 0] * inv).round().long() vi = (proj[:, 1] * inv).round().long() front = z > 1e-4 rr = max(1, int(round(radius * ss))) offs = [(du, dv) for du in range(-rr, rr + 1) for dv in range(-rr, rr + 1) if du * du + dv * dv <= rr * rr] flat_l, z_l, c_l = [], [], [] for du, dv in offs: uu = ui + du vv = vi + dv m = front & (uu >= 0) & (uu < Ws) & (vv >= 0) & (vv < Hs) flat_l.append((vv * Ws + uu)[m]) z_l.append(z[m]) c_l.append(C[m]) flat = torch.cat(flat_l) zc = torch.cat(z_l) cc = torch.cat(c_l) npix = Hs * Ws zbuf = torch.full((npix,), float("inf"), device=dev) zbuf.scatter_reduce_(0, flat, zc, reduce="amin", include_self=True) winner = zc <= zbuf[flat] + 1e-6 img = torch.empty((npix, 3), dtype=torch.float32, device=dev) img[:] = torch.tensor(bg, dtype=torch.float32, device=dev) img[flat[winner]] = cc[winner] cov = torch.zeros((npix,), dtype=torch.float32, device=dev) cov[flat[winner]] = 1.0 img = img.reshape(size, ss, size, ss, 3).mean(dim=(1, 3)) out = img.clamp(0, 255).to(torch.uint8).cpu().numpy() if return_mask: cov = cov.reshape(size, ss, size, ss).mean(dim=(1, 3)) return out, (cov.cpu().numpy() > 0.0) return out def _flood_white(img, bg=(255, 255, 255), tol=14): """Replace near-background pixels (matching the top-left corner) with white.""" corner = img[0, 0].astype(np.int16) diff = np.abs(img.astype(np.int16) - corner).max(axis=-1) out = img.copy() out[diff <= tol] = np.array(bg, np.uint8) return out def srgb_to_linear(colors): """sRGB colors (0-1 or 0-255) -> linear (matches render_pred_output.py).""" c = np.asarray(colors, np.float32) if c.size and c.max() > 1.0: c = c / 255.0 below = c <= 0.04045 lin = np.empty_like(c) lin[below] = c[below] / 12.92 lin[~below] = ((c[~below] + 0.055) / 1.055) ** 2.4 return lin def _orbit_extrinsic(center, azimuth_deg): """World-to-camera (OpenCV) for a camera orbiting ``center`` about the vertical (world Y) axis, starting from the *input* camera (identity at azimuth 0). Rendering with the input intrinsics and this extrinsic therefore overlaps the input image at azimuth 0 and swings around the face otherwise. """ az = math.radians(azimuth_deg) c = np.asarray(center, np.float64).reshape(3) ca, sa = math.cos(az), math.sin(az) Ry = np.array([[ca, 0.0, sa], [0.0, 1.0, 0.0], [-sa, 0.0, ca]], np.float64) c2w = np.eye(4) c2w[:3, :3] = Ry c2w[:3, 3] = c - Ry @ c return np.linalg.inv(c2w) def side_by_side(left, right): """Horizontally stack two same-height RGB frames (original | prediction).""" import cv2 h = max(left.shape[0], right.shape[0]) def fit(img): if img.shape[0] != h: w = int(round(img.shape[1] * h / img.shape[0])) img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA) return img out = np.concatenate([fit(left), fit(right)], axis=1) if out.shape[1] % 2: # even width for video codecs out = out[:, :-1] return np.ascontiguousarray(out) class Renderer: """Colored point-cloud renderer that views the cloud through the *input* camera and orbits around the face. It uses the input intrinsics (so azimuth 0 reproduces — and overlaps — the input image) on a per-clip canvas matching the input aspect ratio. Rendering style mirrors the repo's ``render_pred_output.py``: Open3D OffscreenRenderer at a 2x internal resolution (down-sampled for anti-aliasing), ``defaultUnlit``, ``point_size=8``, sRGB->linear vertex colors, white background. Falls back to a square torch rasterization if headless GL is unavailable. """ def __init__(self, out_h, out_w, intrinsics, input_hw, supersample=2, point_size=0.0, device="cuda", bg=(255, 255, 255), backend="auto"): self.out_h, self.out_w = int(out_h), int(out_w) self.ss = max(1, int(supersample)) self.ih, self.iw = self.out_h * self.ss, self.out_w * self.ss s = self.ih / float(input_hw[0]) # input -> internal scale (aspect kept) K = np.asarray(intrinsics, np.float64).copy() K[:2, :] *= s self.K = K self.point_size = float(point_size) if point_size else 8.0 self.bg = bg self.device = device self.radius_px = max(2, round(self.out_h / 170)) self.backend = "torch" self._o3d = self._r = self._mat = None if backend in ("auto", "open3d"): try: import open3d as o3d r = o3d.visualization.rendering.OffscreenRenderer(self.iw, self.ih) r.scene.set_background([bg[0] / 255, bg[1] / 255, bg[2] / 255, 1.0]) mat = o3d.visualization.rendering.MaterialRecord() mat.shader = "defaultUnlit" mat.point_size = self.point_size self._o3d, self._r, self._mat = o3d, r, mat self.backend = "open3d" except Exception: if backend == "open3d": raise self.backend = "torch" def render(self, points, colors, center, azimuth): if self.backend == "open3d": import cv2 o3d, r = self._o3d, self._r r.scene.clear_geometry() pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(np.ascontiguousarray(points, np.float64)) pcd.colors = o3d.utility.Vector3dVector(srgb_to_linear(colors).astype(np.float64)) r.scene.add_geometry("pc", pcd, self._mat) ext = _orbit_extrinsic(center, azimuth) r.setup_camera(self.K, ext, self.iw, self.ih) img = np.asarray(r.render_to_image())[..., :3] if self.ss > 1: img = cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA) return _flood_white(img, self.bg) # torch fallback: square synthetic-orbit render, then resize (no exact overlap) import cv2 pts = np.asarray(points, np.float64) c = np.asarray(center, np.float64) scale = float(np.percentile(np.linalg.norm(pts - c, axis=1), 90)) if len(pts) else 1.0 R, t = orbit_camera(c, scale, azimuth, dist=scale * 2.6) K = render_intrinsics(self.out_h, 35.0) img = rasterize(points, colors, R, t, K, self.out_h, radius=self.radius_px, supersample=self.ss, device=self.device) return cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA) # --------------------------------------------------------------------------- # # Video # --------------------------------------------------------------------------- # def write_video(frames, path, fps: int = 20): """Write a list of HxWx3 uint8 frames to an mp4 (libx264, yuv420p).""" import imageio.v2 as imageio frames = [np.ascontiguousarray(f) for f in frames] writer = imageio.get_writer(path, fps=fps, codec="libx264", quality=8, macro_block_size=8, ffmpeg_params=["-pix_fmt", "yuv420p"]) for f in frames: writer.append_data(f) writer.close() return path