Umut Kocasari
Add FaceAnything Gradio demo app
4db294e
Raw
History Blame Contribute Delete
12.9 kB
"""Headless point-cloud rasterizer with orbiting cameras and video assembly.
This renders colored point clouds entirely with PyTorch tensor ops (a
super-sampled painter's algorithm with a proper z-buffer), so it works on any
machine with a GPU and needs no OpenGL/EGL/Filament. The same primitive renders
every modality (RGB, depth, normals, canonical, tracks) — only the per-point
colors change — which is what makes the 180-degree "grand tour" morph possible.
"""
from __future__ import annotations
import math
import numpy as np
import torch
# --------------------------------------------------------------------------- #
# Cameras
# --------------------------------------------------------------------------- #
def look_at(eye, center, up):
"""OpenCV world-to-camera ``R (3,3), t (3,)`` looking from ``eye`` at ``center``."""
eye = np.asarray(eye, np.float64)
center = np.asarray(center, np.float64)
up = np.asarray(up, np.float64)
z = center - eye
z /= np.linalg.norm(z) + 1e-12
x = np.cross(z, up)
nx = np.linalg.norm(x)
if nx < 1e-8: # forward parallel to up; nudge
up = up + np.array([1e-3, 0.0, 0.0])
x = np.cross(z, up)
nx = np.linalg.norm(x)
x /= nx
y = np.cross(z, x)
R = np.stack([x, y, z], axis=0)
t = -R @ eye
return R.astype(np.float32), t.astype(np.float32)
def render_intrinsics(size: int, fov_deg: float = 38.0) -> np.ndarray:
"""Square pinhole intrinsics for a ``size x size`` render."""
f = (size * 0.5) / math.tan(math.radians(fov_deg) * 0.5)
return np.array([[f, 0, size * 0.5],
[0, f, size * 0.5],
[0, 0, 1.0]], dtype=np.float32)
def scene_bounds(points: np.ndarray):
"""Bounding-box center and per-axis half-extents of a point set.
Uses robust (0.5/99.5 percentile) bounds so a few flying pixels don't inflate
the framing, while still including the full head (which is far from those
percentiles). Returns ``(center (3,), half_extents (3,))``.
"""
pts = np.asarray(points, np.float32)
pts = pts[np.isfinite(pts).all(axis=1)]
if pts.shape[0] == 0:
return np.zeros(3, np.float32), np.ones(3, np.float32)
lo = np.percentile(pts, 0.5, axis=0)
hi = np.percentile(pts, 99.5, axis=0)
center = (lo + hi) * 0.5
half_extents = np.maximum((hi - lo) * 0.5, 1e-4)
return center.astype(np.float32), half_extents.astype(np.float32)
def orbit_camera(center, scale, azimuth_deg, elevation_deg=0.0,
dist_factor=2.4, up=(0.0, -1.0, 0.0), dist=None):
"""Camera ``(R, t)`` orbiting the face from *outside*.
Azimuth 0 reproduces the original frontal viewpoint (the face front faces the
-Z direction in the unprojected OpenCV cloud, so the camera sits on the -Z
side and looks toward +Z). Positive azimuth swings the camera around the
vertical axis. ``up=(0,-1,0)`` keeps faces upright (+Y is down in OpenCV).
``dist`` overrides the camera distance (else ``scale * dist_factor``).
"""
az = math.radians(azimuth_deg)
el = math.radians(elevation_deg)
if dist is None:
dist = scale * dist_factor
ex = -dist * math.cos(el) * math.sin(az)
ez = -dist * math.cos(el) * math.cos(az)
ey = -dist * math.sin(el) # up is -Y, so look slightly from above
eye = np.asarray(center, np.float64) + np.array([ex, ey, ez], np.float64)
return look_at(eye, center, up)
def sway_azimuths(n: int, amplitude: float = 80.0, cycles: float = 1.0) -> np.ndarray:
"""Orbit schedule (deg): 0 -> -amp -> 0 -> +amp -> 0, i.e. swing left, back to
center, right, and back — repeated ``cycles`` times. Mirrors the keyframed
yaw cycle used by the repo's ``render_*.py`` scripts.
"""
key = [0.0]
for _ in range(max(1, int(round(cycles)))):
key += [-amplitude, 0.0, amplitude, 0.0]
key = np.array(key, np.float64)
key_pos = np.linspace(0, 1, len(key))
return np.interp(np.linspace(0, 1, max(n, 1)), key_pos, key)
def linspace_azimuths(n: int, start: float, stop: float) -> np.ndarray:
return np.linspace(start, stop, max(n, 1))
# --------------------------------------------------------------------------- #
# Rasterizer
# --------------------------------------------------------------------------- #
@torch.no_grad()
def rasterize(points, colors, R, t, K, size: int, radius: int = 2,
bg=(255, 255, 255), supersample: int = 2,
device="cuda", return_mask: bool = False):
"""Render a colored point cloud to a ``size x size`` uint8 RGB image.
Uses a super-sampled z-buffer (nearest point wins per pixel) and average-pool
down-sampling for anti-aliasing.
"""
dev = torch.device(device if torch.cuda.is_available() else "cpu")
P = torch.as_tensor(np.ascontiguousarray(points), dtype=torch.float32, device=dev)
C = torch.as_tensor(np.ascontiguousarray(colors), dtype=torch.float32, device=dev)
if P.numel() == 0:
img = np.tile(np.array(bg, np.uint8), (size, size, 1))
return (img, np.zeros((size, size), bool)) if return_mask else img
Rt = torch.as_tensor(np.asarray(R), dtype=torch.float32, device=dev)
tt = torch.as_tensor(np.asarray(t), dtype=torch.float32, device=dev)
Xc = P @ Rt.T + tt
z = Xc[:, 2]
ss = supersample
Hs = Ws = size * ss
Ks = torch.as_tensor(np.asarray(K), dtype=torch.float32, device=dev).clone()
Ks[:2, :] *= ss
proj = Xc @ Ks.T
inv = 1.0 / proj[:, 2].clamp(min=1e-6)
ui = (proj[:, 0] * inv).round().long()
vi = (proj[:, 1] * inv).round().long()
front = z > 1e-4
rr = max(1, int(round(radius * ss)))
offs = [(du, dv) for du in range(-rr, rr + 1) for dv in range(-rr, rr + 1)
if du * du + dv * dv <= rr * rr]
flat_l, z_l, c_l = [], [], []
for du, dv in offs:
uu = ui + du
vv = vi + dv
m = front & (uu >= 0) & (uu < Ws) & (vv >= 0) & (vv < Hs)
flat_l.append((vv * Ws + uu)[m])
z_l.append(z[m])
c_l.append(C[m])
flat = torch.cat(flat_l)
zc = torch.cat(z_l)
cc = torch.cat(c_l)
npix = Hs * Ws
zbuf = torch.full((npix,), float("inf"), device=dev)
zbuf.scatter_reduce_(0, flat, zc, reduce="amin", include_self=True)
winner = zc <= zbuf[flat] + 1e-6
img = torch.empty((npix, 3), dtype=torch.float32, device=dev)
img[:] = torch.tensor(bg, dtype=torch.float32, device=dev)
img[flat[winner]] = cc[winner]
cov = torch.zeros((npix,), dtype=torch.float32, device=dev)
cov[flat[winner]] = 1.0
img = img.reshape(size, ss, size, ss, 3).mean(dim=(1, 3))
out = img.clamp(0, 255).to(torch.uint8).cpu().numpy()
if return_mask:
cov = cov.reshape(size, ss, size, ss).mean(dim=(1, 3))
return out, (cov.cpu().numpy() > 0.0)
return out
def _flood_white(img, bg=(255, 255, 255), tol=14):
"""Replace near-background pixels (matching the top-left corner) with white."""
corner = img[0, 0].astype(np.int16)
diff = np.abs(img.astype(np.int16) - corner).max(axis=-1)
out = img.copy()
out[diff <= tol] = np.array(bg, np.uint8)
return out
def srgb_to_linear(colors):
"""sRGB colors (0-1 or 0-255) -> linear (matches render_pred_output.py)."""
c = np.asarray(colors, np.float32)
if c.size and c.max() > 1.0:
c = c / 255.0
below = c <= 0.04045
lin = np.empty_like(c)
lin[below] = c[below] / 12.92
lin[~below] = ((c[~below] + 0.055) / 1.055) ** 2.4
return lin
def _orbit_extrinsic(center, azimuth_deg):
"""World-to-camera (OpenCV) for a camera orbiting ``center`` about the vertical
(world Y) axis, starting from the *input* camera (identity at azimuth 0).
Rendering with the input intrinsics and this extrinsic therefore overlaps the
input image at azimuth 0 and swings around the face otherwise.
"""
az = math.radians(azimuth_deg)
c = np.asarray(center, np.float64).reshape(3)
ca, sa = math.cos(az), math.sin(az)
Ry = np.array([[ca, 0.0, sa], [0.0, 1.0, 0.0], [-sa, 0.0, ca]], np.float64)
c2w = np.eye(4)
c2w[:3, :3] = Ry
c2w[:3, 3] = c - Ry @ c
return np.linalg.inv(c2w)
def side_by_side(left, right):
"""Horizontally stack two same-height RGB frames (original | prediction)."""
import cv2
h = max(left.shape[0], right.shape[0])
def fit(img):
if img.shape[0] != h:
w = int(round(img.shape[1] * h / img.shape[0]))
img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)
return img
out = np.concatenate([fit(left), fit(right)], axis=1)
if out.shape[1] % 2: # even width for video codecs
out = out[:, :-1]
return np.ascontiguousarray(out)
class Renderer:
"""Colored point-cloud renderer that views the cloud through the *input*
camera and orbits around the face.
It uses the input intrinsics (so azimuth 0 reproduces — and overlaps — the
input image) on a per-clip canvas matching the input aspect ratio. Rendering
style mirrors the repo's ``render_pred_output.py``: Open3D OffscreenRenderer at
a 2x internal resolution (down-sampled for anti-aliasing), ``defaultUnlit``,
``point_size=8``, sRGB->linear vertex colors, white background. Falls back to a
square torch rasterization if headless GL is unavailable.
"""
def __init__(self, out_h, out_w, intrinsics, input_hw, supersample=2,
point_size=0.0, device="cuda", bg=(255, 255, 255), backend="auto"):
self.out_h, self.out_w = int(out_h), int(out_w)
self.ss = max(1, int(supersample))
self.ih, self.iw = self.out_h * self.ss, self.out_w * self.ss
s = self.ih / float(input_hw[0]) # input -> internal scale (aspect kept)
K = np.asarray(intrinsics, np.float64).copy()
K[:2, :] *= s
self.K = K
self.point_size = float(point_size) if point_size else 8.0
self.bg = bg
self.device = device
self.radius_px = max(2, round(self.out_h / 170))
self.backend = "torch"
self._o3d = self._r = self._mat = None
if backend in ("auto", "open3d"):
try:
import open3d as o3d
r = o3d.visualization.rendering.OffscreenRenderer(self.iw, self.ih)
r.scene.set_background([bg[0] / 255, bg[1] / 255, bg[2] / 255, 1.0])
mat = o3d.visualization.rendering.MaterialRecord()
mat.shader = "defaultUnlit"
mat.point_size = self.point_size
self._o3d, self._r, self._mat = o3d, r, mat
self.backend = "open3d"
except Exception:
if backend == "open3d":
raise
self.backend = "torch"
def render(self, points, colors, center, azimuth):
if self.backend == "open3d":
import cv2
o3d, r = self._o3d, self._r
r.scene.clear_geometry()
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.ascontiguousarray(points, np.float64))
pcd.colors = o3d.utility.Vector3dVector(srgb_to_linear(colors).astype(np.float64))
r.scene.add_geometry("pc", pcd, self._mat)
ext = _orbit_extrinsic(center, azimuth)
r.setup_camera(self.K, ext, self.iw, self.ih)
img = np.asarray(r.render_to_image())[..., :3]
if self.ss > 1:
img = cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA)
return _flood_white(img, self.bg)
# torch fallback: square synthetic-orbit render, then resize (no exact overlap)
import cv2
pts = np.asarray(points, np.float64)
c = np.asarray(center, np.float64)
scale = float(np.percentile(np.linalg.norm(pts - c, axis=1), 90)) if len(pts) else 1.0
R, t = orbit_camera(c, scale, azimuth, dist=scale * 2.6)
K = render_intrinsics(self.out_h, 35.0)
img = rasterize(points, colors, R, t, K, self.out_h, radius=self.radius_px,
supersample=self.ss, device=self.device)
return cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA)
# --------------------------------------------------------------------------- #
# Video
# --------------------------------------------------------------------------- #
def write_video(frames, path, fps: int = 20):
"""Write a list of HxWx3 uint8 frames to an mp4 (libx264, yuv420p)."""
import imageio.v2 as imageio
frames = [np.ascontiguousarray(f) for f in frames]
writer = imageio.get_writer(path, fps=fps, codec="libx264",
quality=8, macro_block_size=8,
ffmpeg_params=["-pix_fmt", "yuv420p"])
for f in frames:
writer.append_data(f)
writer.close()
return path