text-to-3d-2.0

Paused

App Files Files Community

text-to-3d-2.0 / spar3d /models /global_estimator /reni_estimator.py

mboss

Update demo with latest changes

64fccd8 12 months ago

raw

history blame contribute delete

3.58 kB

	from dataclasses import dataclass, field
	from typing import Any, Optional

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from jaxtyping import Float
	from torch import Tensor

	from spar3d.models.illumination.reni.env_map import RENIEnvMap
	from spar3d.models.utils import BaseModule


	def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
	assert d6.shape[-1] == 6, "Input tensor must have shape (..., 6)"

	def proj_u2a(u, a):
	r"""
	u: batch x 3
	a: batch x 3
	"""
	inner_prod = torch.sum(u * a, dim=-1, keepdim=True)
	norm2 = torch.sum(u**2, dim=-1, keepdim=True)
	norm2 = torch.clamp(norm2, min=1e-8)
	factor = inner_prod / (norm2 + 1e-10)
	return factor * u

	x_raw, y_raw = d6[..., :3], d6[..., 3:]

	x = F.normalize(x_raw, dim=-1)
	y = F.normalize(y_raw - proj_u2a(x, y_raw), dim=-1)
	z = torch.cross(x, y, dim=-1)

	return torch.stack((x, y, z), dim=-1)


	class ReniLatentCodeEstimator(BaseModule):
	@dataclass
	class Config(BaseModule.Config):
	triplane_features: int = 40

	n_layers: int = 5
	hidden_features: int = 512
	activation: str = "relu"

	pool: str = "mean"

	reni_env_config: dict = field(default_factory=dict)

	cfg: Config

	def configure(self):
	layers = []
	cur_features = self.cfg.triplane_features * 3
	for _ in range(self.cfg.n_layers):
	layers.append(
	nn.Conv2d(
	cur_features,
	self.cfg.hidden_features,
	kernel_size=3,
	padding=0,
	stride=2,
	)
	)
	layers.append(self.make_activation(self.cfg.activation))

	cur_features = self.cfg.hidden_features

	self.layers = nn.Sequential(*layers)

	self.reni_env_map = RENIEnvMap(self.cfg.reni_env_config)
	self.latent_dim = self.reni_env_map.field.latent_dim

	self.fc_latents = nn.Linear(self.cfg.hidden_features, self.latent_dim * 3)
	nn.init.normal_(self.fc_latents.weight, mean=0.0, std=0.3)

	self.fc_rotations = nn.Linear(self.cfg.hidden_features, 6)
	nn.init.constant_(self.fc_rotations.bias, 0.0)
	nn.init.normal_(
	self.fc_rotations.weight, mean=0.0, std=0.01
	) # Small variance here

	self.fc_scale = nn.Linear(self.cfg.hidden_features, 1)
	nn.init.constant_(self.fc_scale.bias, 0.0)
	nn.init.normal_(self.fc_scale.weight, mean=0.0, std=0.01) # Small variance here

	def make_activation(self, activation):
	if activation == "relu":
	return nn.ReLU(inplace=True)
	elif activation == "silu":
	return nn.SiLU(inplace=True)
	else:
	raise NotImplementedError

	def forward(
	self,
	triplane: Float[Tensor, "B 3 F Ht Wt"],
	rotation: Optional[Float[Tensor, "B 3 3"]] = None,
	) -> dict[str, Any]:
	x = self.layers(
	triplane.reshape(
	triplane.shape[0], -1, triplane.shape[-2], triplane.shape[-1]
	)
	)
	x = x.mean(dim=[-2, -1])

	latents = self.fc_latents(x).reshape(-1, self.latent_dim, 3)
	rotations = rotation_6d_to_matrix(self.fc_rotations(x))
	scale = self.fc_scale(x)

	if rotation is not None:
	rotations = rotations @ rotation.to(dtype=rotations.dtype)

	env_map = self.reni_env_map(latents, rotations, scale)

	return {"illumination": env_map["rgb"]}