Spaces:

UmutKocasari
/

FaceAnything

Running on Zero

FaceAnything / src /faceanything /render.py

Umut Kocasari

Add FaceAnything Gradio demo app

4db294e 8 days ago

12.9 kB

	"""Headless point-cloud rasterizer with orbiting cameras and video assembly.

	This renders colored point clouds entirely with PyTorch tensor ops (a
	super-sampled painter's algorithm with a proper z-buffer), so it works on any
	machine with a GPU and needs no OpenGL/EGL/Filament. The same primitive renders
	every modality (RGB, depth, normals, canonical, tracks) — only the per-point
	colors change — which is what makes the 180-degree "grand tour" morph possible.
	"""
	from __future__ import annotations

	import math

	import numpy as np
	import torch


	# --------------------------------------------------------------------------- #
	# Cameras
	# --------------------------------------------------------------------------- #
	def look_at(eye, center, up):
	"""OpenCV world-to-camera ``R (3,3), t (3,)`` looking from ``eye`` at ``center``."""
	eye = np.asarray(eye, np.float64)
	center = np.asarray(center, np.float64)
	up = np.asarray(up, np.float64)
	z = center - eye
	z /= np.linalg.norm(z) + 1e-12
	x = np.cross(z, up)
	nx = np.linalg.norm(x)
	if nx < 1e-8: # forward parallel to up; nudge
	up = up + np.array([1e-3, 0.0, 0.0])
	x = np.cross(z, up)
	nx = np.linalg.norm(x)
	x /= nx
	y = np.cross(z, x)
	R = np.stack([x, y, z], axis=0)
	t = -R @ eye
	return R.astype(np.float32), t.astype(np.float32)


	def render_intrinsics(size: int, fov_deg: float = 38.0) -> np.ndarray:
	"""Square pinhole intrinsics for a ``size x size`` render."""
	f = (size * 0.5) / math.tan(math.radians(fov_deg) * 0.5)
	return np.array([[f, 0, size * 0.5],
	[0, f, size * 0.5],
	[0, 0, 1.0]], dtype=np.float32)


	def scene_bounds(points: np.ndarray):
	"""Bounding-box center and per-axis half-extents of a point set.

	Uses robust (0.5/99.5 percentile) bounds so a few flying pixels don't inflate
	the framing, while still including the full head (which is far from those
	percentiles). Returns ``(center (3,), half_extents (3,))``.
	"""
	pts = np.asarray(points, np.float32)
	pts = pts[np.isfinite(pts).all(axis=1)]
	if pts.shape[0] == 0:
	return np.zeros(3, np.float32), np.ones(3, np.float32)
	lo = np.percentile(pts, 0.5, axis=0)
	hi = np.percentile(pts, 99.5, axis=0)
	center = (lo + hi) * 0.5
	half_extents = np.maximum((hi - lo) * 0.5, 1e-4)
	return center.astype(np.float32), half_extents.astype(np.float32)


	def orbit_camera(center, scale, azimuth_deg, elevation_deg=0.0,
	dist_factor=2.4, up=(0.0, -1.0, 0.0), dist=None):
	"""Camera ``(R, t)`` orbiting the face from outside.

	Azimuth 0 reproduces the original frontal viewpoint (the face front faces the
	-Z direction in the unprojected OpenCV cloud, so the camera sits on the -Z
	side and looks toward +Z). Positive azimuth swings the camera around the
	vertical axis. ``up=(0,-1,0)`` keeps faces upright (+Y is down in OpenCV).
	``dist`` overrides the camera distance (else ``scale * dist_factor``).
	"""
	az = math.radians(azimuth_deg)
	el = math.radians(elevation_deg)
	if dist is None:
	dist = scale * dist_factor
	ex = -dist * math.cos(el) * math.sin(az)
	ez = -dist * math.cos(el) * math.cos(az)
	ey = -dist * math.sin(el) # up is -Y, so look slightly from above
	eye = np.asarray(center, np.float64) + np.array([ex, ey, ez], np.float64)
	return look_at(eye, center, up)


	def sway_azimuths(n: int, amplitude: float = 80.0, cycles: float = 1.0) -> np.ndarray:
	"""Orbit schedule (deg): 0 -> -amp -> 0 -> +amp -> 0, i.e. swing left, back to
	center, right, and back — repeated ``cycles`` times. Mirrors the keyframed
	yaw cycle used by the repo's ``render_*.py`` scripts.
	"""
	key = [0.0]
	for _ in range(max(1, int(round(cycles)))):
	key += [-amplitude, 0.0, amplitude, 0.0]
	key = np.array(key, np.float64)
	key_pos = np.linspace(0, 1, len(key))
	return np.interp(np.linspace(0, 1, max(n, 1)), key_pos, key)


	def linspace_azimuths(n: int, start: float, stop: float) -> np.ndarray:
	return np.linspace(start, stop, max(n, 1))


	# --------------------------------------------------------------------------- #
	# Rasterizer
	# --------------------------------------------------------------------------- #
	@torch.no_grad()
	def rasterize(points, colors, R, t, K, size: int, radius: int = 2,
	bg=(255, 255, 255), supersample: int = 2,
	device="cuda", return_mask: bool = False):
	"""Render a colored point cloud to a ``size x size`` uint8 RGB image.

	Uses a super-sampled z-buffer (nearest point wins per pixel) and average-pool
	down-sampling for anti-aliasing.
	"""
	dev = torch.device(device if torch.cuda.is_available() else "cpu")
	P = torch.as_tensor(np.ascontiguousarray(points), dtype=torch.float32, device=dev)
	C = torch.as_tensor(np.ascontiguousarray(colors), dtype=torch.float32, device=dev)
	if P.numel() == 0:
	img = np.tile(np.array(bg, np.uint8), (size, size, 1))
	return (img, np.zeros((size, size), bool)) if return_mask else img
	Rt = torch.as_tensor(np.asarray(R), dtype=torch.float32, device=dev)
	tt = torch.as_tensor(np.asarray(t), dtype=torch.float32, device=dev)

	Xc = P @ Rt.T + tt
	z = Xc[:, 2]
	ss = supersample
	Hs = Ws = size * ss
	Ks = torch.as_tensor(np.asarray(K), dtype=torch.float32, device=dev).clone()
	Ks[:2, :] *= ss
	proj = Xc @ Ks.T
	inv = 1.0 / proj[:, 2].clamp(min=1e-6)
	ui = (proj[:, 0] * inv).round().long()
	vi = (proj[:, 1] * inv).round().long()
	front = z > 1e-4

	rr = max(1, int(round(radius * ss)))
	offs = [(du, dv) for du in range(-rr, rr + 1) for dv in range(-rr, rr + 1)
	if du * du + dv * dv <= rr * rr]

	flat_l, z_l, c_l = [], [], []
	for du, dv in offs:
	uu = ui + du
	vv = vi + dv
	m = front & (uu >= 0) & (uu < Ws) & (vv >= 0) & (vv < Hs)
	flat_l.append((vv * Ws + uu)[m])
	z_l.append(z[m])
	c_l.append(C[m])
	flat = torch.cat(flat_l)
	zc = torch.cat(z_l)
	cc = torch.cat(c_l)

	npix = Hs * Ws
	zbuf = torch.full((npix,), float("inf"), device=dev)
	zbuf.scatter_reduce_(0, flat, zc, reduce="amin", include_self=True)
	winner = zc <= zbuf[flat] + 1e-6

	img = torch.empty((npix, 3), dtype=torch.float32, device=dev)
	img[:] = torch.tensor(bg, dtype=torch.float32, device=dev)
	img[flat[winner]] = cc[winner]
	cov = torch.zeros((npix,), dtype=torch.float32, device=dev)
	cov[flat[winner]] = 1.0

	img = img.reshape(size, ss, size, ss, 3).mean(dim=(1, 3))
	out = img.clamp(0, 255).to(torch.uint8).cpu().numpy()
	if return_mask:
	cov = cov.reshape(size, ss, size, ss).mean(dim=(1, 3))
	return out, (cov.cpu().numpy() > 0.0)
	return out


	def _flood_white(img, bg=(255, 255, 255), tol=14):
	"""Replace near-background pixels (matching the top-left corner) with white."""
	corner = img[0, 0].astype(np.int16)
	diff = np.abs(img.astype(np.int16) - corner).max(axis=-1)
	out = img.copy()
	out[diff <= tol] = np.array(bg, np.uint8)
	return out


	def srgb_to_linear(colors):
	"""sRGB colors (0-1 or 0-255) -> linear (matches render_pred_output.py)."""
	c = np.asarray(colors, np.float32)
	if c.size and c.max() > 1.0:
	c = c / 255.0
	below = c <= 0.04045
	lin = np.empty_like(c)
	lin[below] = c[below] / 12.92
	lin[~below] = ((c[~below] + 0.055) / 1.055) ** 2.4
	return lin


	def _orbit_extrinsic(center, azimuth_deg):
	"""World-to-camera (OpenCV) for a camera orbiting ``center`` about the vertical
	(world Y) axis, starting from the input camera (identity at azimuth 0).

	Rendering with the input intrinsics and this extrinsic therefore overlaps the
	input image at azimuth 0 and swings around the face otherwise.
	"""
	az = math.radians(azimuth_deg)
	c = np.asarray(center, np.float64).reshape(3)
	ca, sa = math.cos(az), math.sin(az)
	Ry = np.array([[ca, 0.0, sa], [0.0, 1.0, 0.0], [-sa, 0.0, ca]], np.float64)
	c2w = np.eye(4)
	c2w[:3, :3] = Ry
	c2w[:3, 3] = c - Ry @ c
	return np.linalg.inv(c2w)


	def side_by_side(left, right):
	"""Horizontally stack two same-height RGB frames (original \| prediction)."""
	import cv2
	h = max(left.shape[0], right.shape[0])
	def fit(img):
	if img.shape[0] != h:
	w = int(round(img.shape[1] * h / img.shape[0]))
	img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)
	return img
	out = np.concatenate([fit(left), fit(right)], axis=1)
	if out.shape[1] % 2: # even width for video codecs
	out = out[:, :-1]
	return np.ascontiguousarray(out)


	class Renderer:
	"""Colored point-cloud renderer that views the cloud through the input
	camera and orbits around the face.

	It uses the input intrinsics (so azimuth 0 reproduces — and overlaps — the
	input image) on a per-clip canvas matching the input aspect ratio. Rendering
	style mirrors the repo's ``render_pred_output.py``: Open3D OffscreenRenderer at
	a 2x internal resolution (down-sampled for anti-aliasing), ``defaultUnlit``,
	``point_size=8``, sRGB->linear vertex colors, white background. Falls back to a
	square torch rasterization if headless GL is unavailable.
	"""

	def __init__(self, out_h, out_w, intrinsics, input_hw, supersample=2,
	point_size=0.0, device="cuda", bg=(255, 255, 255), backend="auto"):
	self.out_h, self.out_w = int(out_h), int(out_w)
	self.ss = max(1, int(supersample))
	self.ih, self.iw = self.out_h * self.ss, self.out_w * self.ss
	s = self.ih / float(input_hw[0]) # input -> internal scale (aspect kept)
	K = np.asarray(intrinsics, np.float64).copy()
	K[:2, :] *= s
	self.K = K
	self.point_size = float(point_size) if point_size else 8.0
	self.bg = bg
	self.device = device
	self.radius_px = max(2, round(self.out_h / 170))
	self.backend = "torch"
	self._o3d = self._r = self._mat = None
	if backend in ("auto", "open3d"):
	try:
	import open3d as o3d
	r = o3d.visualization.rendering.OffscreenRenderer(self.iw, self.ih)
	r.scene.set_background([bg[0] / 255, bg[1] / 255, bg[2] / 255, 1.0])
	mat = o3d.visualization.rendering.MaterialRecord()
	mat.shader = "defaultUnlit"
	mat.point_size = self.point_size
	self._o3d, self._r, self._mat = o3d, r, mat
	self.backend = "open3d"
	except Exception:
	if backend == "open3d":
	raise
	self.backend = "torch"

	def render(self, points, colors, center, azimuth):
	if self.backend == "open3d":
	import cv2
	o3d, r = self._o3d, self._r
	r.scene.clear_geometry()
	pcd = o3d.geometry.PointCloud()
	pcd.points = o3d.utility.Vector3dVector(np.ascontiguousarray(points, np.float64))
	pcd.colors = o3d.utility.Vector3dVector(srgb_to_linear(colors).astype(np.float64))
	r.scene.add_geometry("pc", pcd, self._mat)
	ext = _orbit_extrinsic(center, azimuth)
	r.setup_camera(self.K, ext, self.iw, self.ih)
	img = np.asarray(r.render_to_image())[..., :3]
	if self.ss > 1:
	img = cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA)
	return _flood_white(img, self.bg)
	# torch fallback: square synthetic-orbit render, then resize (no exact overlap)
	import cv2
	pts = np.asarray(points, np.float64)
	c = np.asarray(center, np.float64)
	scale = float(np.percentile(np.linalg.norm(pts - c, axis=1), 90)) if len(pts) else 1.0
	R, t = orbit_camera(c, scale, azimuth, dist=scale * 2.6)
	K = render_intrinsics(self.out_h, 35.0)
	img = rasterize(points, colors, R, t, K, self.out_h, radius=self.radius_px,
	supersample=self.ss, device=self.device)
	return cv2.resize(img, (self.out_w, self.out_h), interpolation=cv2.INTER_AREA)



	# --------------------------------------------------------------------------- #
	# Video
	# --------------------------------------------------------------------------- #
	def write_video(frames, path, fps: int = 20):
	"""Write a list of HxWx3 uint8 frames to an mp4 (libx264, yuv420p)."""
	import imageio.v2 as imageio
	frames = [np.ascontiguousarray(f) for f in frames]
	writer = imageio.get_writer(path, fps=fps, codec="libx264",
	quality=8, macro_block_size=8,
	ffmpeg_params=["-pix_fmt", "yuv420p"])
	for f in frames:
	writer.append_data(f)
	writer.close()
	return path