pixel3dmms

Build error

App Files Files Community

pixel3dmms / src /pixel3dmm /tracking /tracker.py

alexnasa

Update src/pixel3dmm/tracking/tracker.py

219e22c verified 8 months ago

raw

history blame contribute delete

84.7 kB

	import shutil

	import mediapy
	from PIL import Image, ImageDraw
	import os.path
	from enum import Enum
	from pathlib import Path
	import wandb
	import time

	import cv2
	import numpy as np
	import torch
	import torch.backends.cudnn as cudnn
	import torch.nn as nn
	import trimesh
	from pytorch3d.io import load_obj
	from pytorch3d.ops import knn_points, knn_gather
	from torch.utils.tensorboard import SummaryWriter
	from tqdm import tqdm
	from torchvision.transforms.functional import gaussian_blur
	from time import time


	import pyvista as pv
	import dreifus
	from dreifus.matrix import Pose, Intrinsics, CameraCoordinateConvention, PoseType
	from dreifus.pyvista import add_camera_frustum, render_from_camera

	from pixel3dmm import env_paths
	from pixel3dmm.tracking import util
	from pixel3dmm.tracking.losses import UVLoss
	from pixel3dmm.tracking import nvdiffrast_util
	from pixel3dmm.tracking.renderer_nvdiffrast import NVDRenderer
	from pixel3dmm import env_paths
	from pixel3dmm.tracking.flame.FLAME import FLAME
	from pixel3dmm.utils.misc import tensor2im
	from pixel3dmm.utils.utils_3d import rotation_6d_to_matrix, matrix_to_rotation_6d, euler_angles_to_matrix
	from pixel3dmm.utils.drawing import plot_points


	def timeit(t0, tag):
	t1 = time()
	#print(f'[PROFILER]: {tag} took {t1-t0} seconds')
	return t1


	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
	rank = 42
	torch.manual_seed(rank)
	torch.cuda.manual_seed(rank)
	cudnn.benchmark = True
	np.random.seed(rank)
	I = torch.eye(3)[None].cuda().detach()
	I6D = matrix_to_rotation_6d(I)

	left_iris_flame = [4597, 4542, 4510, 4603, 4570]
	right_iris_flame = [4051, 3996, 3964, 3932, 4028]
	left_iris_mp = [468, 469, 470, 471, 472]
	right_iris_mp = [473, 474, 475, 476, 477]


	torch.set_float32_matmul_precision('high')

	class View(Enum):
	GROUND_TRUTH = 1
	COLOR_OVERLAY = 2
	SHAPE_OVERLAY = 4
	SHAPE = 8
	LANDMARKS = 16
	HEATMAP = 32
	DEPTH = 64


	def get_intrinsics(focal_length, principal_point, use_hack : bool = True, size : int = 512):
	intrinsics = torch.eye(3)[None, ...].float().cuda().repeat(focal_length.shape[0], 1,1 )
	intrinsics[:, 0, 0] = focal_length.squeeze() * size
	intrinsics[:, 1, 1] = focal_length.squeeze() * size
	intrinsics[:, :2, 2] = size/2+0.5 + principal_point * (size/2+0.5)

	if use_hack:
	intrinsics[:, 0:1, 2:3] = size - intrinsics[:, 0:1, 2:3] # TODO fix this hack

	return intrinsics



	def get_extrinsics(R_base, t_base):
	timestep = 0
	w2c_openGL = torch.eye(4)[None, ...].float().cuda()
	w2c_openGL[:, :3, :3] = R_base[timestep]
	w2c_openGL[:, :3, 3] = t_base[timestep]
	return w2c_openGL


	def project_points_screen_space(points3d, focal_length, principal_point, R_base, t_base, size : int = 512):
	# construct camera matrices
	intrinsics = get_intrinsics(focal_length, principal_point, size=size)
	w2c_openGL = get_extrinsics(R_base, t_base).repeat(focal_length.shape[0], 1, 1)

	B = points3d.shape[0]
	reps_extr = B if w2c_openGL.shape[0] == 1 else 1
	reps_intr = B if intrinsics.shape[0] == 1 else 1
	# apply w2c transformation
	lmk68_cam_space = torch.bmm(
	torch.cat([points3d, torch.ones_like(points3d[..., :1])], dim=-1),
	w2c_openGL.permute(0, 2, 1).repeat(reps_extr, 1, 1))

	# project from cam_space to screen_space
	lmk68_cam_space_prime = lmk68_cam_space[..., :3] / -lmk68_cam_space[..., [2]]
	lmk68_screen_space = (-1) * torch.bmm(lmk68_cam_space_prime, intrinsics.permute(0, 2, 1).repeat(reps_intr, 1, 1))[..., :2]
	lmk68_screen_space = torch.stack([size - 1 - lmk68_screen_space[..., 0], lmk68_screen_space[..., 1], lmk68_cam_space[..., 2]], dim=-1)
	return lmk68_screen_space


	WFLW_2_iBUG68 = np.array(
	[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 46, 51,
	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 65, 67, 68, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82,
	83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95])

	WFLW_2_iBUG68 = torch.from_numpy(WFLW_2_iBUG68).cuda()

	COMPILE = True


	if COMPILE:
	project_points_screen_space = torch.compile(project_points_screen_space)


	class Tracker(object):
	def __init__(self, config, flame_module, renderer,
	device='cuda:0',
	):
	self.config = config
	self.flame = flame_module
	self.diff_renderer = renderer

	self.config = config
	self.device = device
	self.actor_name = self.config.video_name
	DATA_FOLDER = f'{env_paths.PREPROCESSED_DATA}/{self.actor_name}'
	self.MAX_STEPS = min(len([f for f in os.listdir(f'{DATA_FOLDER}/cropped/') if f.endswith('.jpg') or f.endswith('.png')]) - self.config.start_frame, 1000)
	self.FRAME_SKIP = 1
	self.BATCH_SIZE = self.config.batch_size

	print(f'''
	<<<<<<<< INITIALIZING TRACKER INSTANCE FOR {self.actor_name} >>>>>>>>
	''')



	self.mirror_order = torch.from_numpy(np.load(f'{env_paths.MIRROR_INDEX}')).long().cuda()

	self.uv_loss_fn = UVLoss(stricter_mask=self.config.uv_loss.stricter_uv_mask,
	delta_uv= self.config.uv_loss.delta_uv,
	dist_uv=self.config.uv_loss.dist_uv)

	if COMPILE:
	self.uv_loss_fn.compute_loss = torch.compile(self.uv_loss_fn.compute_loss)



	self.actor_name = self.actor_name + f'_nV{config.num_views}'


	if config.no_lm:
	self.actor_name = self.actor_name + '_noLM'
	if config.no_pho:
	self.actor_name = self.actor_name + '_noPho'


	if self.config.ignore_mica:
	self.actor_name = self.actor_name + '_noMICA'

	if self.config.flame2023:
	self.actor_name = self.actor_name + '_FLAME23'



	if self.config.uv_map_super > 0:
	self.actor_name = self.actor_name + f'_uv{self.config.uv_map_super}'
	if self.config.normal_super > 0:
	self.actor_name = self.actor_name + f'_n{self.config.normal_super}'
	if self.config.normal_super_can > 0:
	self.actor_name = self.actor_name + f'_nc{self.config.normal_super_can}'


	self.global_step = 0

	self.no_sh = config.no_sh
	self.no_lm = config.no_lm
	self.no_pho = config.no_pho

	# Latter will be set up
	self.frame = 0
	self.is_initializing = False
	self.image_size = torch.tensor([[config.image_size[0], config.image_size[1]]]).cuda()
	if hasattr(self.config, 'output_folder'):
	self.save_folder = self.config.output_folder
	else:
	self.save_folder = env_paths.TRACKING_OUTPUT
	self.output_folder = os.path.join(self.save_folder, self.actor_name)
	self.checkpoint_folder = os.path.join(self.save_folder, self.actor_name, "checkpoint")
	self.mesh_folder = os.path.join(self.save_folder, self.config.video_name, "mesh")
	self.create_output_folders()
	self.writer = SummaryWriter(log_dir=self.save_folder + self.actor_name + '/logs')

	self.cam_pose_nvd = {}
	self.R_base = {}
	self.t_base = {}

	flame_mesh_mask = np.load(f'{env_paths.FLAME_MASK_ASSET}/FLAME2020/FLAME_masks/FLAME_masks.pkl', allow_pickle=True, encoding='latin1')
	self.vertex_face_mask = torch.from_numpy(flame_mesh_mask['face']).cuda().long()


	self.setup_renderer()


	self.intermediate_exprs = []
	self.intermediate_Rs = []
	self.intermediate_ts = []
	self.intermediate_eyes = []
	self.intermediate_eyelids = []
	self.intermediate_jaws = []
	self.intermediate_necks = []
	self.intermediate_fls = []
	self.intermediate_pps = []

	self.cached_data = {}




	def get_image_size(self):
	return self.image_size[0][0].item(), self.image_size[0][1].item()


	def create_output_folders(self):
	Path(self.save_folder).mkdir(parents=True, exist_ok=True)
	Path(self.checkpoint_folder).mkdir(parents=True, exist_ok=True)
	Path(self.mesh_folder).mkdir(parents=True, exist_ok=True)


	def setup_renderer(self):
	mesh_file = f'{env_paths.head_template}'
	self.config.image_size = self.get_image_size()
	self.flame.vertex_face_mask = self.vertex_face_mask


	if COMPILE:
	self.flame = torch.compile(self.flame)
	self.opt_pre = torch.compile(self.opt_pre)
	self.opt_post = torch.compile(self.opt_post)
	self.actual_smooth = torch.compile(self.actual_smooth)


	self.renderer = self.diff_renderer # already global


	self.faces = load_obj(mesh_file)[1]


	def save_checkpoint(self, frame_id, selected_frames = None):

	if selected_frames is None:
	exp = self.exp
	eyes = self.eyes
	eyelids = self.eyelids
	R = self.R
	t = self.t
	jaw = self.jaw
	neck = self.neck
	focal_length = self.focal_length
	principal_point = self.principal_point
	else:
	exp = self.exp(selected_frames)
	eyes = self.eyes(selected_frames)
	eyelids = self.eyelids(selected_frames)
	R = self.R(selected_frames)
	t = self.t(selected_frames)
	jaw = self.jaw(selected_frames)
	neck = self.neck(selected_frames)
	if self.config.global_camera:
	focal_length = self.focal_length
	principal_point = self.principal_point
	else:
	focal_length = self.focal_length(selected_frames)
	principal_point = self.principal_point(selected_frames)

	frame = {
	'flame': {
	'exp': exp.clone().detach().cpu().numpy(),
	'shape': self.shape.clone().detach().cpu().numpy(),
	'eyes': eyes.clone().detach().cpu().numpy(),
	'eyelids': eyelids.clone().detach().cpu().numpy(),
	'jaw': jaw.clone().detach().cpu().numpy(),
	'neck': neck.clone().detach().cpu().numpy(),
	'R': R.clone().detach().cpu().numpy(),
	'R_rotation_matrix': rotation_6d_to_matrix(R).detach().cpu().numpy(),
	't': t.clone().detach().cpu().numpy(),
	},
	'img_size': self.image_size.clone().detach().cpu().numpy()[0],
	'frame_id': frame_id,
	'global_step': self.global_step
	}

	cam_params = {
	f'R_base_{serial}': self.R_base[serial].clone().detach().cpu().numpy() for serial in self.R_base.keys()
	}
	cam_pos = {
	f't_base_{serial}': self.t_base[serial].clone().detach().cpu().numpy() for serial in self.R_base.keys()
	}
	intr = {
	'fl': focal_length.clone().detach().cpu().numpy(),
	'pp': principal_point.clone().detach().cpu().numpy(),
	}
	cam_params.update(cam_pos)
	cam_params.update(intr)
	frame.update(
	{
	f'camera': cam_params
	}
	)
	bs = exp.shape[0]
	vertices, lmks, joint_transforms, vertices_can, vertices_noneck = self.flame(cameras=torch.inverse(self.R_base[0])[:1, ...].repeat(bs, 1, 1),
	shape_params=self.shape[:1, ...].repeat(bs, 1),
	expression_params=exp,
	eye_pose_params=eyes,
	jaw_pose_params=jaw,
	neck_pose_params=neck,
	rot_params_lmk_shift=R,
	eyelid_params=eyelids,
	)
	frame.update(
	{
	f'joint_transforms': joint_transforms.detach().cpu().numpy(),
	}
	)

	f = self.diff_renderer.faces[0].cpu().numpy()
	for b_i in range(bs):

	v = vertices[b_i].cpu().numpy()

	if self.config.save_meshes:
	trimesh.Trimesh(faces=f, vertices=v, process=False).export(f'{self.mesh_folder}/{frame_id:05d}.glb')
	torch.save(frame, f'{self.checkpoint_folder}/{frame_id:05d}.frame')

	selction_indx = np.array([36, 39, 42, 45, 33, 48, 54])
	_lmks = lmks[b_i].detach().squeeze().cpu().numpy()

	if self.config.save_landmarks:
	np.save(f'{self.mesh_folder}/landmarks_{frame_id}_{b_i}.npy', _lmks[selction_indx])



	if frame_id == self.config.start_frame and self.config.save_meshes:
	faces = self.diff_renderer.faces[0].cpu().numpy()
	trimesh.Trimesh(faces=faces, vertices=vertices_can[0].detach().cpu().numpy(), process=False).export(f'{self.mesh_folder}/canonical.glb')
	if self.config.save_landmarks:
	lmks = lmks.detach().squeeze().cpu().numpy()
	np.save(f'{self.mesh_folder}/ibug68_{frame_id}.glb', lmks)
	selction_indx = np.array([36, 39, 42, 45, 33, 48, 54])
	np.save(f'{self.mesh_folder}/now_{frame_id}.glb', lmks[selction_indx])




	def get_heatmap(self, values):
	l2 = tensor2im(values)
	l2 = cv2.cvtColor(l2, cv2.COLOR_RGB2BGR)
	#l2[l2 > 125] = 125
	#l2 = cv2.normalize(l2, None, 0, 255, cv2.NORM_MINMAX)
	#l2[l2 > 35] = 35
	#l2 = cv2.normalize(l2, None, 0, 255, cv2.NORM_MINMAX)
	l2 = l2 - 127
	max_err = 25
	l2[l2>max_err] = max_err
	l2 = ((l2 / max_err)*255).astype(np.uint8)
	heatmap = cv2.applyColorMap(l2, cv2.COLORMAP_JET) #/ 255.
	heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) / 255.
	#heatmap = heatmap ** (1/3)
	#Image.fromarray((heatmap*255).astype(np.uint8)).show()
	#exit()
	#heatmap = cv2.cvtColor(cv2.addWeighted(heatmap, 0.75, l2, 0.25, 0).astype(np.uint8), cv2.COLOR_BGR2RGB) / 255.
	heatmap = torch.from_numpy(heatmap).permute(2, 0, 1)

	return heatmap


	def to_cuda(self, batch, unsqueeze=False):
	for key in batch.keys():
	if torch.is_tensor(batch[key]):
	batch[key] = batch[key].to(self.device)
	if unsqueeze:
	batch[key] = batch[key][None]

	return batch


	def create_parameters(self, timestep, mica_shape):
	bz = 1
	pose_mat = np.eye(4)
	pose_mat[2, 3] = -1

	opencv_w2c_pose = Pose(pose_mat, camera_coordinate_convention=dreifus.matrix.CameraCoordinateConvention.OPEN_CV)
	opencv_w2c_pose = opencv_w2c_pose.change_pose_type(dreifus.matrix.PoseType.CAM_2_WORLD)

	opencv_w2c_pose.look_at(np.zeros(3), np.array([0, 1, 0]))

	opencv_w2c_pose = opencv_w2c_pose.change_pose_type(dreifus.matrix.PoseType.WORLD_2_CAM)
	self.debug_pose_init = opencv_w2c_pose.change_pose_type(dreifus.matrix.PoseType.WORLD_2_CAM).copy()


	self.shape = mica_shape.detach().clone()
	self.mica_shape = mica_shape.detach().clone()
	if self.config.ignore_mica:
	self.shape = torch.zeros_like(self.shape)
	self.mica_shape = torch.zeros_like(self.mica_shape)


	cam_pose = opencv_w2c_pose
	cam_pose = cam_pose.change_pose_type(dreifus.matrix.PoseType.CAM_2_WORLD)
	cam_pose_nvd = cam_pose.copy()
	cam_pose_nvd = cam_pose_nvd.change_camera_coordinate_convention(new_camera_coordinate_convention=dreifus.matrix.CameraCoordinateConvention.OPEN_GL)
	cam_pose_nvd = cam_pose_nvd.change_pose_type(dreifus.matrix.PoseType.WORLD_2_CAM)
	self.cam_pose_nvd[timestep] = torch.from_numpy(cam_pose_nvd.copy()).float().cuda()

	R = torch.from_numpy(cam_pose_nvd.get_rotation_matrix()).unsqueeze(0).cuda()
	T = torch.from_numpy(cam_pose_nvd.get_translation()).unsqueeze(0).cuda()
	R.requires_grad = True
	T.requires_grad = True

	self.R_base[timestep] = R
	self.t_base[timestep] = T


	init_f = 2000 * self.config.size/512
	self.focal_length = torch.tensor([[init_f/self.config.size]]).float().to(self.device)
	self.principal_point = torch.tensor([[0, 0]]).float().to(self.device)
	self.focal_length.requires_grad = True
	self.principal_point.requires_grad = True
	intrinsics = torch.tensor([[init_f, 0, self.config.size//2],
	[0, init_f, self.config.size//2],
	[0, 0, 1]]).float().cuda()
	proj_512 = nvdiffrast_util.intrinsics2projection(intrinsics,
	znear=0.1, zfar=10,
	width=self.config.size,
	height=self.config.size)

	self.r_mvps = {}
	for serial in self.cam_pose_nvd.keys():
	self.r_mvps[serial] = ( proj_512 @ self.cam_pose_nvd[serial] )[None, ...]




	n_timesteps = 1
	expression_params = np.zeros([n_timesteps, 100])
	jaw_params = np.zeros([n_timesteps, 3])
	neck_params = np.zeros([n_timesteps, 3])
	flame_R = torch.from_numpy(np.stack([np.eye(3) for _ in range(n_timesteps)], axis=0))
	flame_t = torch.from_numpy(np.stack([np.zeros([3]) for _ in range(n_timesteps)], axis=0))
	self.R = nn.Parameter(matrix_to_rotation_6d(flame_R.float().to(self.device)))
	self.t = nn.Parameter(flame_t.float().to(self.device))

	self.expression_params = expression_params
	self.jaw_params = jaw_params.astype(np.float32)
	self.neck_params = neck_params.astype(np.float32)

	self.shape = nn.Parameter(self.mica_shape.detach().clone())

	self.texture_observation_mask = None

	self.exp = nn.Parameter(torch.from_numpy(self.expression_params[[0] + self.config.keyframes,..., :]).float().to(self.device))
	self.jaw = nn.Parameter(matrix_to_rotation_6d(euler_angles_to_matrix(torch.from_numpy(self.jaw_params[[0]+ self.config.keyframes,..., :]).cuda(), 'XYZ')))
	self.neck = nn.Parameter(matrix_to_rotation_6d(euler_angles_to_matrix(torch.from_numpy(self.neck_params[[0]+ self.config.keyframes,..., :]).cuda(), 'XYZ')))




	self.eyes = nn.Parameter(torch.cat([matrix_to_rotation_6d(I), matrix_to_rotation_6d(I)], dim=1).repeat(1+len(self.config.keyframes), 1) )
	self.eyelids = nn.Parameter(torch.zeros(1+len(self.config.keyframes), 2).float().to(self.device))



	def parse_mask(self, ops, batch, visualization=False):
	result = ops['mask_images_rendering']

	if visualization:
	result = ops['mask_images']

	return result.detach()



	def clone_params_keyframes_all(self, freeze_id : bool = False, is_joint : bool = False, freeze_cam : bool = False,
	include_neck : bool = False):

	lr_scale = 1.0
	lr_scale_id_related = 1.0
	if freeze_id:
	lr_scale_id_related = 0.1


	params = [
	{'params': [self.exp], 'lr': self.config.lr_exp * lr_scale, 'name': ['exp']}, # 0.025
	{'params': [self.eyes], 'lr': 0.005 * lr_scale, 'name': ['eyes']},
	# {'params': [self.eyelids.clone())], 'lr': 0.001, 'name': ['eyelids']},
	{'params': [self.eyelids], 'lr': 0.002 * lr_scale, 'name': ['eyelids']},
	# {'params': [self.sh.clone())], 'lr': 0.01, 'name': ['sh']},
	{'params': [self.t], 'lr': self.config.lr_t * lr_scale, 'name': ['t']},
	#{'params': [self.t.clone())], 'lr': 0.005 * lr_scale, 'name': ['t']},
	{'params': [self.R], 'lr': self.config.lr_R * lr_scale, 'name': ['R']},
	#{'params': [self.R.clone())], 'lr': 0.003 * lr_scale, 'name': ['R']},
	# {'params': [self.tex.clone())], 'lr': 0.001, 'name': ['tex']},
	# {'params': [self.principal_point.clone())], 'lr': 0.001, 'name': ['principal_point']},
	# {'params': [self.focal_length.clone())], 'lr': 0.001, 'name': ['focal_length']}
	]
	#params.append({'params': [self.shape.clone())], 'lr': self.config.lr_id * lr_scale, 'name': ['shape']})
	if not freeze_id:
	if is_joint:
	params.append({'params': [self.shape], 'lr': self.config.lr_id * lr_scale * 1, 'name': ['shape']})
	else:
	params.append({'params': [self.shape], 'lr': self.config.lr_id * lr_scale, 'name': ['shape']})
	#params.append({'params': [self.shape], 'lr': 0.0, 'name': ['shape']})
	params.append({'params': [self.jaw], 'lr': self.config.lr_jaw * lr_scale, 'name': ['jaw']})
	if include_neck:
	params.append({'params': [self.neck], 'lr': self.config.lr_neck, 'name': ['neck']})

	# params.append({'params': [self.t], 'lr': 0.001, 'name': ['translation']})
	# params.append({'params': [self.R], 'lr': 0.005, 'name': ['rotation']})
	# params.append({'params': [self.focal_length, self.principal_point], 'lr': 0.01*lr_scale, 'name': ['camera_params']})
	#if not self.config.load_intr:
	if not freeze_cam:
	params.append({'params': [self.focal_length], 'lr': self.config.lr_f * lr_scale_id_related, 'name': ['camera_params']})
	params.append({'params': [self.principal_point], 'lr': self.config.lr_pp * lr_scale_id_related, 'name': ['camera_params']})

	return params


	def clone_params_keyframes_all_joint(self, freeze_id : bool = False, is_joint : bool = False,
	include_neck : bool = False):

	lr_scale = 1.0
	lr_scale_id_related = 1.0
	if freeze_id:
	lr_scale_id_related = 0.1
	params = [
	{'params': self.exp.parameters(), 'lr': self.config.lr_exp * lr_scale, 'name': ['exp']}, # 0.025
	{'params': self.eyes.parameters(), 'lr': 0.005 * lr_scale, 'name': ['eyes']},
	{'params': self.eyelids.parameters(), 'lr': 0.002 * lr_scale, 'name': ['eyelids']},
	{'params': self.t.parameters(), 'lr': self.config.lr_t * lr_scale, 'name': ['t']},
	{'params': self.R.parameters(), 'lr': self.config.lr_R * lr_scale, 'name': ['R']},
	]

	params.append({'params': self.jaw.parameters(), 'lr': self.config.lr_jaw * lr_scale, 'name': ['jaw']})
	if include_neck:
	params.append({'params': self.neck.parameters(), 'lr': self.config.lr_neck, 'name': ['jaw']})

	if not self.config.global_camera:
	params.append({'params': self.focal_length.parameters(), 'lr': self.config.lr_f * lr_scale_id_related,
	'name': ['camera_params']})
	params.append({'params': self.principal_point.parameters(), 'lr': self.config.lr_pp * lr_scale_id_related,
	'name': ['camera_params']})
	#params.append({'params': [self.shape], 'lr': self.config.lr_id * lr_scale * 1, 'name': ['shape']})
	return params


	def reduce_loss(self, losses):
	all_loss = 0.
	for key in losses.keys():
	all_loss = all_loss + losses[key]
	losses['all_loss'] = all_loss
	return all_loss


	def optimize_camera(self, batch, steps=2000, is_first_frame : bool = False
	):
	batch = self.to_cuda(batch)

	images, landmarks, lmk_mask = self.parse_landmarks(batch)
	h, w = images.shape[2:4]
	num_keyframes = 1

	uv_mask = batch["uv_mask"]
	uv_map = batch["uv_map"] if "uv_map" in batch else None

	if uv_map is not None:
	uv_map[(1 - uv_mask[:, :, :, :]).bool()] = 0


	self.focal_length.requires_grad = True
	self.principal_point.requires_grad = True

	lr_mult = 1.0

	params = [
	{'params': [self.t], 'lr': lr_mult*0.001}, ##0.05},
	{'params': [self.R], 'lr': lr_mult*0.005}, #0.05},
	]

	if is_first_frame:
	params.append({'params': [self.focal_length], 'lr': 0.02})
	params.append({'params': [self.principal_point], 'lr': 0.0001})

	optimizer = torch.optim.Adam(params)
	scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(steps*0.75),
	gamma=0.1)

	#self.checkpoint(batch, visualizations=[[View.GROUND_TRUTH, View.LANDMARKS, View.SHAPE_OVERLAY]],
	# frame_dst='/camera', save=False, dump_directly=True)

	t = tqdm(range(steps), desc='', leave=True, miniters=100)
	num_views = 1 #len(self.R_base.keys())
	bs = 1 #len(self.cam_serials) * num_keyframes

	for k in t:
	vertices_can, lmk68, lmkMP, vertices_can_can, vertices_noneck = self.flame(cameras=torch.inverse(self.R_base[0]),
	shape_params=self.shape if self.shape.shape[0] == bs else self.shape.repeat(bs, 1),
	expression_params=self.exp.repeat_interleave(num_views, dim=0),
	eye_pose_params=self.eyes.repeat_interleave(num_views, dim=0),
	jaw_pose_params=self.jaw.repeat_interleave(num_views, dim=0),
	neck_pose_params=self.neck.repeat_interleave(num_views, dim=0),
	rot_params_lmk_shift=(matrix_to_rotation_6d(torch.inverse(rotation_6d_to_matrix(self.R)))).repeat_interleave(num_views, dim=0),
	)

	lmk68 = torch.einsum('bny,bxy->bnx', lmk68,
	rotation_6d_to_matrix(self.R.repeat_interleave(num_views, dim=0))) + self.t.repeat_interleave(num_views, dim=0).unsqueeze(1)
	verts = torch.einsum('bny,bxy->bnx', vertices_can,
	rotation_6d_to_matrix(
	self.R.repeat_interleave(num_views, dim=0))) + self.t.repeat_interleave(num_views,
	dim=0).unsqueeze(
	1)


	lmk68_screen_space = project_points_screen_space(lmk68, self.focal_length, self.principal_point, self.R_base, self.t_base, size=self.config.size)
	verts_screen_space = project_points_screen_space(verts, self.focal_length, self.principal_point, self.R_base, self.t_base, size=self.config.size)


	losses = {}
	losses['pp_reg'] = torch.sum(self.principal_point ** 2)
	if k <= steps // 2:
	losses['lmk68'] = util.lmk_loss(lmk68_screen_space[..., :2], landmarks[..., :2], [h, w], lmk_mask) * 3000

	if k == 0:
	self.uv_loss_fn.compute_corresp(uv_map)
	if k > steps // 2:
	uv_loss = self.uv_loss_fn.compute_loss(verts_screen_space)
	losses['uv_loss'] = uv_loss * 1000


	all_loss = 0.
	for key in losses.keys():
	all_loss = all_loss + losses[key]
	losses['all_loss'] = all_loss

	optimizer.zero_grad()
	all_loss.backward()
	optimizer.step()


	scheduler.step()
	optimizer.zero_grad()

	intrinsics = get_intrinsics(self.focal_length, self.principal_point, use_hack=False, size=self.config.size)

	proj_512 = nvdiffrast_util.intrinsics2projection(intrinsics[0],
	znear=0.1, zfar=5,
	width=self.config.size,
	height=self.config.size)
	for serial in self.cam_pose_nvd.keys():
	extr = get_extrinsics(self.R_base[serial], self.t_base[serial])
	r_mvps = proj_512 @ extr
	self.r_mvps[serial] = r_mvps

	loss = all_loss.item()
	t.set_description(f'Loss for camera {loss:.4f}')
	self.frame += 1
	#if k % 100 == 0:
	# self.checkpoint(batch, visualizations=[[View.GROUND_TRUTH, View.LANDMARKS, View.SHAPE_OVERLAY, View.COLOR_OVERLAY]], frame_dst='/camera', save=False, dump_directly=True, is_camera=True)
	self.frame = 0


	@torch.compiler.disable
	def get_vars(self, is_joint, selected_frames):
	if not is_joint:
	exp = self.exp
	eyes = self.eyes
	eyelids = self.eyelids
	_R = self.R
	_t = self.t
	jaw = self.jaw
	neck = self.neck
	focal_length = self.focal_length
	principal_point = self.principal_point
	else:
	selected_frames = torch.from_numpy(selected_frames).long().cuda()
	exp = self.exp(selected_frames)
	eyes = self.eyes(selected_frames)
	eyelids = self.eyelids(selected_frames)
	_R = self.R(selected_frames)
	_t = self.t(selected_frames)
	jaw = self.jaw(selected_frames)
	neck = self.neck(selected_frames)
	if not self.config.global_camera:
	focal_length = self.focal_length(selected_frames)
	principal_point = self.principal_point(selected_frames)
	else:
	focal_length = self.focal_length
	principal_point = self.principal_point
	return exp, eyes, eyelids, _R, _t, jaw, neck, focal_length, principal_point


	@torch.compiler.disable
	def data_stuff(self, is_joint, iters, p, image_lmks68, lmk_mask, normal_map, normal_mask, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris):
	if is_joint:
	with torch.no_grad():
	if (p < int(iters * 0.15) and (p % 2 == 0)) or not self.config.smooth:
	all_frames = np.array(
	range(self.config.start_frame, self.MAX_STEPS + self.config.start_frame, self.FRAME_SKIP))
	selected_frames = np.sort(np.random.choice(np.arange(len(all_frames)), size=self.BATCH_SIZE,
	replace=False)) # np.random.choice(
	else:
	all_frames = np.array(
	range(self.config.start_frame, self.MAX_STEPS + self.config.start_frame, self.FRAME_SKIP))
	start = np.min(all_frames)
	end = np.max(all_frames)
	rnd_start = np.random.randint(start, end)
	assert (end - start) >= self.BATCH_SIZE + 1
	assert self.BATCH_SIZE % 2 == 0
	if rnd_start - self.BATCH_SIZE // 2 < 0:
	rnd_start = self.BATCH_SIZE // 2
	if rnd_start + self.BATCH_SIZE // 2 + 1 > end:
	rnd_start = end - self.BATCH_SIZE // 2 + 1
	selected_frames = np.array(
	list(range(rnd_start - self.BATCH_SIZE // 2, rnd_start + self.BATCH_SIZE // 2)))

	selected_frames_th = torch.from_numpy(selected_frames).long()
	batch = {k: self.cached_data[k][selected_frames_th, ...] for k in self.cached_data.keys()}
	images, landmarks, lmk_mask = self.parse_landmarks(batch)

	uv_mask = batch["uv_mask"]
	normal_mask = batch["normal_mask"]
	normal_map = batch["normals"] if "normals" in batch else None
	uv_map = batch["uv_map"] if "uv_map" in batch else None
	#TODO check if this was important in any way
	if uv_map is not None:
	uv_map[(1 - uv_mask[:, :, :, :]).bool()] = 0

	num_views = len(self.R_base.keys())
	bs = batch['normals'].shape[0] * num_views

	image_lmks68 = landmarks
	if landmarks is not None:
	left_iris = batch['left_iris']
	right_iris = batch['right_iris']
	mask_left_iris = batch['mask_left_iris']
	mask_right_iris = batch['mask_right_iris']
	else:
	selected_frames = None
	bs = 1
	num_views = 1
	batch = None

	return selected_frames, batch, bs, num_views, image_lmks68, lmk_mask, normal_map, normal_mask, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris



	#TODO: could be improved by compiling all the actuall smooth loss stuff

	#@torch.compile
	def actual_smooth(self, variables, losses):
	reg_smooth_exp = (variables['exp'][:-1, :] - variables['exp'][1:, :]).square().mean()
	reg_smooth_eyes = (variables['eyes'][:-1, :] - variables['eyes'][1:, :]).square().mean()
	reg_smooth_eyelids = (variables['eyelids'][:-1, :] - variables['eyelids'][1:, :]).square().mean()
	reg_smooth_R = (variables['R'][:-1, :] - variables['R'][1:, :]).square().mean()
	reg_smooth_t = (variables['t'][:-1, :] - variables['t'][1:, :]).square().mean()
	reg_smooth_jaw = (variables['jaw'][:-1, :] - variables['jaw'][1:, :]).square().mean()
	reg_smooth_neck = (variables['neck'][:-1, :] - variables['neck'][1:, :]).square().mean()
	if not self.config.global_camera:
	reg_smooth_principal_point = (
	variables['principal_point'][:-1, :] - variables['principal_point'][1:, :]).square().mean()
	reg_smooth_focal_length = (
	variables['focal_length'][:-1, :] - variables['focal_length'][1:, :]).square().mean()
	else:
	reg_smooth_principal_point = torch.zeros_like(reg_smooth_jaw)
	reg_smooth_focal_length = torch.zeros_like(reg_smooth_jaw)
	losses['smooth/exp'] = reg_smooth_exp * self.config.reg_smooth_exp * self.config.reg_smooth_mult
	losses['smooth/eyes'] = reg_smooth_eyes * self.config.reg_smooth_eyes * self.config.reg_smooth_mult
	losses['smooth/eyelids'] = reg_smooth_eyelids * self.config.reg_smooth_eyelids * self.config.reg_smooth_mult
	losses['smooth/jaw'] = reg_smooth_jaw * self.config.reg_smooth_jaw * self.config.reg_smooth_mult
	losses['smooth/neck'] = reg_smooth_neck * self.config.reg_smooth_neck * self.config.reg_smooth_mult
	losses['smooth/R'] = reg_smooth_R * self.config.reg_smooth_R * self.config.reg_smooth_mult
	losses['smooth/t'] = reg_smooth_t * self.config.reg_smooth_t * self.config.reg_smooth_mult
	losses['smooth/principal_point'] = reg_smooth_principal_point * self.config.reg_smooth_pp * self.config.reg_smooth_mult
	losses['smooth/focal_length'] = reg_smooth_focal_length * self.config.reg_smooth_fl * self.config.reg_smooth_mult
	return losses

	@torch.compiler.disable
	def add_smooth_loss(self, losses, is_joint, p, iters, variables):
	if is_joint and self.config.smooth and ((p >= int(iters * 0.15) and (p % 2 == 1)) ): # and p % 2 != 0 and False:
	losses = self.actual_smooth(variables, losses)

	return losses


	def opt_pre(self, is_joint, iters, p, no_lm, image_lmks68, lmk_mask, normal_mask, normal_map, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris):

	image_size = [self.config.size, self.config.size]

	selected_frames, batch, bs, num_views, image_lmks68, lmk_mask, normal_map, normal_mask, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris = self.data_stuff(is_joint, iters, p, image_lmks68, lmk_mask, normal_map, normal_mask, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris)

	self.diff_renderer.reset()
	losses = {}
	exp, eyes, eyelids, _R, _t, jaw, neck, focal_length, principal_point = self.get_vars(is_joint, selected_frames)

	variables = {
	'exp': exp,
	'eyes': eyes,
	'eyelids': eyelids,
	'R': _R,
	't': _t,
	'jaw': jaw,
	'neck': neck,
	'principal_point': principal_point,
	'focal_lenght': focal_length,
	}

	intrinsics = get_intrinsics(focal_length, principal_point, use_hack=False, size=self.config.size)

	proj_512 = nvdiffrast_util.intrinsics2projection(intrinsics,
	znear=0.1, zfar=5,
	width=self.config.size,
	height=self.config.size)
	for serial in self.cam_pose_nvd.keys():
	extr = get_extrinsics(self.R_base[serial], self.t_base[serial])
	r_mvps = torch.matmul(proj_512, extr.repeat(bs, 1, 1))
	self.r_mvps[serial] = r_mvps

	vertices_can, lmk68, lmkMP, vertices_can_can, vertices_noneck = self.flame(
	cameras=torch.inverse(self.R_base[0]).repeat(bs, 1, 1),
	shape_params=self.shape if self.shape.shape[0] == bs else self.shape.repeat(bs, 1).cuda(),
	expression_params=exp.repeat_interleave(num_views, dim=0), # .repeat(bs, 1),
	eye_pose_params=eyes.repeat_interleave(num_views, dim=0), # .repeat(bs, 1),
	jaw_pose_params=jaw.repeat_interleave(num_views, dim=0), # .repeat(bs, 1),
	neck_pose_params=neck.repeat_interleave(num_views, dim=0), # .repeat(bs, 1),
	eyelid_params=eyelids.repeat_interleave(num_views, dim=0), # .repeat(bs, 1),
	rot_params_lmk_shift=(matrix_to_rotation_6d(torch.inverse(rotation_6d_to_matrix(_R)))).repeat_interleave(
	num_views, dim=0), # .repeat(bs, 1)
	)

	verts_can_can_mirrored = vertices_can_can[:, self.mirror_order, :]
	vertices_can_can_mirrored = torch.zeros_like(verts_can_can_mirrored)
	vertices_can_can_mirrored[:, :, 0] = -verts_can_can_mirrored[:, :, 0]
	vertices_can_can_mirrored[:, :, 1:] = verts_can_can_mirrored[:, :, 1:]
	mirror_loss = (vertices_can_can_mirrored - vertices_can_can).square().sum(-1)
	mirror_loss = mirror_loss.mean()

	lmk68 = torch.einsum('bny,bxy->bnx', lmk68,
	rotation_6d_to_matrix(_R.repeat_interleave(num_views, dim=0))) + _t.repeat_interleave(
	num_views, dim=0).unsqueeze(1)

	vertices = torch.einsum('bny,bxy->bnx', vertices_can,
	rotation_6d_to_matrix(_R.repeat_interleave(num_views, dim=0))) + _t.repeat_interleave(
	num_views, dim=0).unsqueeze(1)
	vertices_noneck = torch.einsum('bny,bxy->bnx', vertices_noneck,
	rotation_6d_to_matrix(_R.repeat_interleave(num_views, dim=0))) + _t.repeat_interleave(
	num_views, dim=0).unsqueeze(1)

	proj_lmks68 = project_points_screen_space(lmk68, focal_length, principal_point, self.R_base, self.t_base,
	size=self.config.size)
	proj_vertices = project_points_screen_space(vertices, focal_length, principal_point, self.R_base, self.t_base,
	size=self.config.size)

	right_eye, left_eye = eyes[:, :6], eyes[:, 6:]


	# landmark loss
	if not no_lm:
	lmk_scale = 1.0 # 0.0001
	# Landmarks sparse term
	# losses[('loss/lmk_oval')] = util.oval_lmk_loss(proj_lmks68[..., :2], image_lmks68, image_size, lmk_mask) * self.config.w_lmks_oval * lmk_scale
	# losses['loss/lmk_68'] = util.lmk_loss(proj_lmks68[:, 17:, :2], image_lmks68[:, 17:, :], image_size, lmk_mask[:, 17:, :]) * self.config.w_lmks * lmk_scale
	# if self.config.use_eyebrows:
	# losses['loss/lmk_eyebrows'] = util.lmk_loss(proj_lmks68[:, 17:27, :2], image_lmks68[:, 17:27, :], image_size, lmk_mask[:, 17:27, :]) * self.config.w_lmks * lmk_scale * 5.0
	losses['loss/lmk_eye2'] = util.lmk_loss(proj_lmks68[:, 36:48, :2], image_lmks68[:, 36:48, :], image_size,
	lmk_mask[:, 36:48,
	:]) * self.config.w_lmks * lmk_scale * 5 #10 # 0 #2.0 #0.5 #0.0 #100
	if self.config.use_mouth_lmk:
	losses['loss/lmk_mouth'] = util.lmk_loss(proj_lmks68[:, 48:68, :2], image_lmks68[:, 48:68, :],
	image_size,
	lmk_mask[:, 48:68, :]) * self.config.w_lmks_mouth * lmk_scale * 0.25
	losses['loss/lmk_mouth_closure'] = util.mouth_closure_lmk_loss(proj_lmks68[..., :2], image_lmks68,
	image_size,
	lmk_mask) * self.config.w_lmks_mouth * lmk_scale * 2.5

	losses['loss/lmk_eye'] = util.eye_closure_lmk_loss(proj_lmks68[..., :2], image_lmks68, image_size,
	lmk_mask) * self.config.w_lmks_lid * lmk_scale * 500 # 0 #500 #0.0 #10
	losses['loss/lmk_iris_left'] = util.lmk_loss(proj_vertices[:, left_iris_flame[:1], ..., :2], left_iris,
	image_size,
	mask_left_iris) * self.config.w_lmks_iris * lmk_scale * 50.00
	losses['loss/lmk_iris_right'] = util.lmk_loss(proj_vertices[:, right_iris_flame[:1], ..., :2], right_iris,
	image_size,
	mask_right_iris) * self.config.w_lmks_iris * lmk_scale * 50.0

	# Reguralizers
	losses['reg/exp'] = torch.sum(exp ** 2, dim=-1).mean() * self.config.w_exp
	losses['reg/sym'] = torch.sum((right_eye - left_eye) ** 2, dim=-1).mean() * 0.1 # 8.0 #*5.0
	losses['reg/jaw'] = torch.sum((I6D - jaw) ** 2, dim=-1).mean() * self.config.w_jaw
	losses['reg/neck'] = torch.sum((I6D - neck) ** 2, dim=-1).mean() * self.config.w_neck
	# losses['reg/eye_lids'] = torch.sum((eyelids[:, 0] - eyelids[:, 1]) ** 2, dim=-1).mean() * 0.1
	losses['reg/eye_left'] = torch.sum((I6D - left_eye) ** 2, dim=-1).mean() * 0.01
	losses['reg/eye_right'] = torch.sum((I6D - right_eye) ** 2, dim=-1).mean() * 0.01

	losses['reg/shape'] = torch.sum((self.shape - self.mica_shape) ** 2, dim=-1).mean() * self.config.w_shape
	losses['reg/shape_general'] = torch.sum((self.shape) ** 2, dim=-1).mean() * self.config.w_shape_general

	losses['reg/mirror'] = mirror_loss * 5000
	if not (self.config.n_fine and p >= iters // 2):
	losses['reg/pp'] = torch.sum(principal_point ** 2, dim=-1).mean()

	return batch, losses, vertices, vertices_noneck, vertices_can, vertices_can_can, proj_vertices, proj_lmks68, selected_frames, variables, num_views, normal_mask, normal_map, uv_map, uv_mask


	def opt_post(self, variables, ops, proj_vertices, proj_lmks68, batch, is_joint, is_first_step, losses, uv_map, selected_frames, p, iters, num_views, normal_mask, normal_map):
	grabbed_depth = ops['actual_rendered_depth'][:, 0,
	torch.clamp(proj_vertices[:, :, 1].long(), 0,
	self.config.size - 1),
	torch.clamp(proj_vertices[:, :, 0].long(), 0,
	self.config.size - 1),
	][:, 0, :]

	is_visible_verts_idx = grabbed_depth < (proj_vertices[:, :, 2] + 1e-2)
	if not self.config.occ_filter:
	is_visible_verts_idx = torch.ones_like(is_visible_verts_idx)

	valid_bg_classes = batch['valid_bg'] # bg-class or neck-class
	if self.config.sil_super > 0:
	if is_joint or (not is_first_step): # and p > 50 and p < int(iters*0.85): # 100
	# losses['loss/sil'] =((1-upper_forehead[:, None, :, :]) * (batch['fg_mask'] - ops['fg_images'])).abs().mean() * self.config.sil_super#0
	losses['loss/sil'] = ((valid_bg_classes[:, None, :, :]) * (
	batch['fg_mask'] - ops['fg_images'])).abs().mean() * self.config.sil_super # 0
	else:
	losses['loss/sil'] = ((valid_bg_classes[:, None, :, :]) * (
	batch['fg_mask'] - ops['fg_images'])).abs().mean() * self.config.sil_super / 10 # 0

	if self.config.uv_map_super: # and p > iters // 2:
	gt_uv = uv_map[:, :2, :, :].permute(0, 2, 3, 1)
	if self.config.uv_l2:
	uv_loss = ((gt_uv - ops['uv_images']) * batch["uv_mask"][:, 0, ...].unsqueeze(-1)).square().mean() * 100
	else:
	uv_loss = ((gt_uv - ops['uv_images']) * batch["uv_mask"][:, 0, ...].unsqueeze(-1)).abs().mean()
	# TODO: outlier filtering!!!
	losses['loss/uv_pixel'] = uv_loss * self.config.uv_map_super

	if self.config.uv_map_super > 0.0: # and (p < iters // 2 or self.config.keep_uv) and not self.config.no2d_verts:
	# uv_loss = get_uv_loss(uv_map, proj_vertices)
	if self.uv_loss_fn.gt_2_verts is None:
	self.uv_loss_fn.compute_corresp(uv_map, selected_frames=selected_frames)


	uv_loss = self.uv_loss_fn.compute_loss(proj_vertices, selected_frames=selected_frames, uv_map=uv_map,
	l2_loss=self.config.uv_l2, is_visible_verts_idx=is_visible_verts_idx)
	losses['loss/uv'] = uv_loss * self.config.uv_map_super # 000

	skip_normals = False
	if self.config.n_fine and p < iters // 2:
	skip_normals = True

	if (self.config.normal_super > 0.0 or self.config.normal_super_can > 0.0) and not skip_normals:
	# normal_loss_map = normal_loss_map * dilated_eye_mask[:, 0, ...] * (1 - ops['mask_images_eyes_region'][:, 0, ...])
	# use dilated eye mask only
	# maybe also applie eyemask in image not rendering
	dilated_eye_mask = 1 - (gaussian_blur(ops['mask_images_eyes'],
	[self.config.normal_mask_ksize, self.config.normal_mask_ksize],
	sigma=[self.config.normal_mask_ksize,
	self.config.normal_mask_ksize]) > 0).float()
	pred_normals = ops['normal_images'] # 1 3 512 512 normals in world space
	rot_mat = rotation_6d_to_matrix(variables["R"].repeat_interleave(num_views, dim=0)) # 1 3 3

	pred_normals_flame_space = torch.einsum('bxy,bxhw->byhw', rot_mat, pred_normals)
	if normal_map is not None:
	l_map = (normal_map - pred_normals_flame_space)
	valid = ((l_map.abs().sum(dim=1) / 3) < self.config.delta_n).unsqueeze(1)
	normal_loss_map = l_map * valid.float() * normal_mask * dilated_eye_mask
	if self.config.normal_l2:
	losses['loss/normal'] = normal_loss_map.square().mean() * self.config.normal_super
	else:
	losses['loss/normal'] = normal_loss_map.abs().mean() * self.config.normal_super
	else:
	losses['loss/normal'] = 0.0


	# smoothness loss
	losses = self.add_smooth_loss(losses, is_joint, p, iters, variables)

	all_loss = self.reduce_loss(losses)

	return all_loss


	def optimize_color(self, batch, params_func,
	no_lm : bool = False,
	save_timestep=0,
	is_joint : bool = False,
	is_first_step : bool = False,
	):

	iters = self.config.iters
	if not is_joint:
	images, landmarks, lmk_mask = self.parse_landmarks(batch)

	uv_mask = batch["uv_mask"]
	normal_mask = batch["normal_mask"]

	normal_map = batch["normals"] if "normals" in batch else None
	uv_map = batch["uv_map"] if "uv_map" in batch else None

	if uv_map is not None:
	uv_map[(1-uv_mask[:, :, :, :]).bool()] = 0


	# Optimizer per step
	if is_joint:
	optimizer = torch.optim.SparseAdam(params_func())
	params_global = [
	{'params': [self.shape], 'lr': self.config.lr_id * 1.0, 'name': ['shape']}
	]
	if self.config.global_camera:
	params_global.append({'params': [self.focal_length], 'lr': self.config.lr_f * 1.0,
	'name': ['camera_params']})
	params_global.append({'params': [self.principal_point], 'lr': self.config.lr_pp * 1.0,
	'name': ['camera_params']})
	optimizer_id = torch.optim.Adam(params_global)

	optimizer_id.zero_grad()
	else:
	optimizer = torch.optim.Adam(params_func())

	optimizer.zero_grad()


	if not is_joint:
	num_views = len(self.R_base.keys())
	bs = batch['normals'].shape[0] * num_views

	image_lmks68 = landmarks
	if landmarks is not None:
	left_iris = batch['left_iris']
	right_iris = batch['right_iris']
	mask_left_iris = batch['mask_left_iris']
	mask_right_iris = batch['mask_right_iris']
	else:
	image_lmks68 = None
	lmk_mask, normal_mask, normal_map, uv_map, uv_mask = None, None, None, None, None
	left_iris, right_iris, mask_left_iris, mask_right_iris = None, None, None, None

	self.diff_renderer.reset()

	best_loss = np.inf

	n_steps_stagnant = 0
	stagnant_window_size = 10
	past_k_steps = np.array([100.0 for _ in range(stagnant_window_size)])

	iterator = tqdm(range(iters), desc='', leave=True, miniters=100)

	for p in iterator:

	if is_joint and p == int(iters*0.5):

	for pgroup in optimizer.param_groups:
	if pgroup['name'] in ['t', 'R', 'jaw']:
	pgroup['lr'] = pgroup['lr'] / 10
	print(f'LR Reduce at iter {p}, for pgroup {pgroup["name"]}')
	else:
	pgroup['lr'] = pgroup['lr'] / 2
	if is_joint and p == int(iters *0.75):
	for pgroup in optimizer.param_groups:
	if pgroup['name'] in ['t', 'R', 'jaw']:
	pgroup['lr'] = pgroup['lr'] / 5
	print(f'LR Reduce at iter {p}, for pgroup {pgroup["name"]}')
	else:
	pgroup['lr'] = pgroup['lr'] / 2

	if is_joint and p == int(iters *0.9):
	for pgroup in optimizer.param_groups:
	if pgroup['name'] in ['t', 'R', 'jaw']:
	pgroup['lr'] = pgroup['lr'] / 2
	print(f'LR Reduce at iter {p}, for pgroup {pgroup["name"]}')
	else:
	pgroup['lr'] = pgroup['lr'] / 5


	batch_joint, losses, vertices, vertices_noneck, vertices_can, vertices_can_can, proj_vertices, proj_lmks68, selected_frames, variables, num_views, normal_mask, normal_map, uv_map, uv_mask = self.opt_pre(is_joint, iters, p, no_lm, image_lmks68, lmk_mask, normal_mask, normal_map, uv_map, uv_mask, left_iris, right_iris, mask_left_iris, mask_right_iris)

	if is_joint:
	batch = batch_joint

	timestep = 0
	ops = self.diff_renderer(vertices, None, None,
	self.r_mvps[timestep], self.R_base[timestep], self.t_base[timestep],
	texture_observation_mask=self.texture_observation_mask,
	verts_can=vertices_can,
	verts_noneck=vertices_noneck,
	verts_can_can=vertices_can_can,
	verts_depth=proj_vertices[:, :, 2:3],
	)


	all_loss = self.opt_post(variables, ops, proj_vertices, proj_lmks68, batch, is_joint, is_first_step, losses, uv_map, selected_frames, p, iters, num_views, normal_mask, normal_map)

	#vertices.retain_grad()
	#if not self.init_done:
	all_loss.backward()#retain_graph=True)
	optimizer.step()
	optimizer.zero_grad()
	if is_joint:
	optimizer_id.step()
	optimizer_id.zero_grad()


	#if p == 0 or p == iters-1:
	#if p == iters-1:# and not self.config.low_overhead and False:
	#wandb.log(losses)

	self.global_step += 1
	loss_color = all_loss.item()

	if loss_color < best_loss - 1.0:
	best_loss = loss_color
	n_steps_stagnant = 0
	elif p > 25: # only start counting after n steps
	n_steps_stagnant += 1

	if p > 0:
	past_k_steps[p%stagnant_window_size] = np.abs(all_loss.item() - prev_loss)
	prev_loss = all_loss.item()


	if (self.frame % 99 == 0 or p < 10) and is_joint:
	pass
	#with torch.no_grad():
	# intrinsics = get_intrinsics(focal_length, principal_point, use_hack=False)

	#proj_512 = nvdiffrast_util.intrinsics2projection(intrinsics,
	# znear=0.1, zfar=5,
	# width=512,
	# height=512)
	#for serial in self.cam_pose_nvd.keys():
	# extr = get_extrinsics(self.R_base[serial], self.t_base[serial])
	# r_mvps = torch.matmul(proj_512, extr.repeat(bs, 1, 1))
	# self.r_mvps[serial] = r_mvps
	#self.checkpoint(batch, visualizations=[[View.GROUND_TRUTH, View.LANDMARKS, View.SHAPE_OVERLAY]],
	# frame_dst='/debug_joint', save=False, dump_directly=True, timestep=p, selected_frames=selected_frames, is_final=True)
	self.frame += 1

	iterator.set_description(f'Timestep {save_timestep}; Loss {all_loss.item():.4f}')

	#if n_steps_stagnant > 35 and not is_joint:
	# print('Early Stopping, go to next frame!')
	# #break
	if not is_joint and not is_first_step:
	if p > stagnant_window_size and np.mean(past_k_steps) < self.config.early_stopping_delta: #3.0: #3.0:
	print('Early Stopping, go to next frame!')
	#losses['early_stopping'] = past_k_steps
	#wandb.log(losses)
	#wandb.log({'early_stopping': wandb.Histogram(past_k_steps)})

	break
	#print('rate of change', np.mean(past_k_steps))


	def render_and_save(self, batch,
	visualizations=[[View.GROUND_TRUTH, View.LANDMARKS, View.HEATMAP], [View.COLOR_OVERLAY, View.SHAPE_OVERLAY, View.SHAPE]],
	frame_dst='/video', save=True, dump_directly=False,
	outer_iter = None,
	is_camera : bool = False,
	all_keyframes : bool = False,
	timestep : int = 0,
	is_final : bool = False,
	selected_frames = None,
	):
	batch = self.to_cuda(batch)
	images, landmarks, _ = self.parse_landmarks(batch)

	if 'uv_map' in batch:
	uv_map = batch['uv_map']
	uv_mask = batch['uv_mask']
	uv_map[(1-uv_mask).bool()] = 0
	else:
	uv_map = None
	uv_mask = None

	if 'normals' in batch:
	normal_map = batch['normals']
	else:
	normal_map = None
	if 'normal_map_can' in batch:
	normal_map_can = batch['normal_map_can']
	else:
	normal_map_can = None


	savefolder = self.save_folder + self.actor_name + frame_dst
	num_keyframes = 1#1 + len(self.config.keyframes)

	with torch.no_grad():
	self.diff_renderer.reset()
	num_views = len(self.R_base.keys())
	bs = batch['normals'].shape[0] * num_keyframes #self.shape.shape[0]

	if selected_frames is None:
	exp = self.exp
	eyes = self.eyes
	eyelids = self.eyelids
	R = self.R
	t = self.t
	jaw = self.jaw
	neck = self.neck
	focal_length = self.focal_length
	principal_point = self.principal_point
	else:
	exp = self.exp(selected_frames)
	eyes = self.eyes(selected_frames)
	eyelids = self.eyelids(selected_frames)
	R = self.R(selected_frames)
	t = self.t(selected_frames)
	jaw = self.jaw(selected_frames)
	neck = self.neck(selected_frames)
	if not self.config.global_camera:
	focal_length = self.focal_length(selected_frames)
	principal_point = self.principal_point(selected_frames)
	else:
	focal_length = self.focal_length
	principal_point = self.principal_point

	with torch.no_grad():
	intrinsics = get_intrinsics(focal_length, principal_point, use_hack=False, size=self.config.size)

	proj_512 = nvdiffrast_util.intrinsics2projection(intrinsics,
	znear=0.1, zfar=5,
	width=self.config.size,
	height=self.config.size)
	for serial in self.cam_pose_nvd.keys():
	extr = get_extrinsics(self.R_base[serial], self.t_base[serial])
	r_mvps = torch.matmul(proj_512, extr.repeat(bs, 1, 1))
	self.r_mvps[serial] = r_mvps
	vertices_can, _lmk68, lmkMP, vertices_can_can, vertices_noneck = self.flame(
	#cameras=torch.inverse(self.R_base[0]),
	cameras=torch.inverse(self.R_base[0]).repeat(bs, 1, 1),
	shape_params=self.shape.repeat(bs, 1),
	expression_params=exp.repeat_interleave(num_views, dim=0), #torch.from_numpy(self.expression_params[:1, :]).cuda().repeat(bs, 1), #self.exp,
	eye_pose_params=eyes.repeat_interleave(num_views, dim=0),
	#euler_angles_to_matrix(x_opts['rotation'][i], 'XYZ')
	jaw_pose_params=jaw.repeat_interleave(num_views, dim=0), #matrix_to_rotation_6d(euler_angles_to_matrix(torch.from_numpy(self.jaw_params[:1, :]).cuda(), 'XYZ')).repeat(bs, 1), #self.jaw,
	neck_pose_params=neck.repeat_interleave(num_views, dim=0), #matrix_to_rotation_6d(euler_angles_to_matrix(torch.from_numpy(self.jaw_params[:1, :]).cuda(), 'XYZ')).repeat(bs, 1), #self.jaw,
	eyelid_params=eyelids.repeat_interleave(num_views, dim=0),
	rot_params_lmk_shift=(matrix_to_rotation_6d(torch.inverse(rotation_6d_to_matrix(R)))).repeat_interleave(num_views, dim=0),
	)


	lmk68 = torch.einsum('bny,bxy->bnx', _lmk68, rotation_6d_to_matrix(R.repeat_interleave(num_views, dim=0))) + t.repeat_interleave(num_views, dim=0).unsqueeze(1)
	vertices = torch.einsum('bny,bxy->bnx', vertices_can, rotation_6d_to_matrix(R.repeat_interleave(num_views, dim=0))) + t.repeat_interleave(num_views, dim=0).unsqueeze(1)
	vertices_noneck = torch.einsum('bny,bxy->bnx', vertices_noneck, rotation_6d_to_matrix(R.repeat_interleave(num_views, dim=0))) + t.repeat_interleave(num_views, dim=0).unsqueeze(1)


	lmk68 = project_points_screen_space(lmk68, focal_length, principal_point, self.R_base, self.t_base, size=self.config.size)
	proj_vertices = project_points_screen_space(vertices, focal_length, principal_point, self.R_base, self.t_base, size=self.config.size)


	_timestep = 0
	ops = self.diff_renderer(vertices, None, None,
	self.r_mvps[_timestep], self.R_base[_timestep], self.t_base[_timestep],
	verts_can=vertices_can,
	verts_noneck=vertices_noneck,
	verts_depth=proj_vertices[:, :, 2:3],
	is_viz=True
	)
	# if they asked only for the pure shape mask:
	if visualizations == [[View.SHAPE]]:
	# build your normal‐map preview as before
	normals = ops['normal_images'][0].cpu().numpy() # [3,H,W]
	normals = (normals + 1.0) / 2.0 # → [0,1]
	normals = np.transpose(normals, (1,2,0)) # H×W×3
	arr = (normals * 255).clip(0,255).astype(np.uint8)

	# --- export the posed mesh, using the correct face indices field ---
	os.makedirs(self.mesh_folder, exist_ok=True)
	frame_id = str(0).zfill(5)
	ply_path = os.path.join(self.mesh_folder, f"{frame_id}.glb")

	# pull out the face index tensor
	faces_np = self.faces.verts_idx.cpu().numpy()
	# `vertices` is your posed mesh: shape (1, V, 3)
	verts_np = vertices[0].detach().cpu().numpy()

	# 1) build your mesh (this will compute smooth normals automatically)
	mesh = trimesh.Trimesh(vertices=verts_np, faces=faces_np)

	# 2) fetch those normals: shape is (V,3), each component in [-1,1]
	normals = mesh.vertex_normals # (V,3) numpy array

	# 3) convert them to RGB in [0,255]:
	# (n+1)/2 maps [-1,1]→[0,1], then *255→[0,255]
	colors = ((normals + 1.0) * 0.5 * 255.0).astype(np.uint8) # (V,3)

	# 4) you need RGBA for many formats—just set alpha=255
	alpha = np.full((colors.shape[0],1), 255, dtype=np.uint8)
	vertex_colors = np.hstack([colors, alpha]) # (V,4)

	# 5) assign those as your mesh’s visual colors
	mesh.visual.vertex_colors = vertex_colors

	# 6) export—PLY or GLB both support vertex colors
	out_path = os.path.join(self.mesh_folder, f"{frame_id}.glb")
	mesh.export(out_path)

	return arr

	mask = (self.parse_mask(ops, batch, visualization=True) > 0).float()
	grabbed_depth = ops['actual_rendered_depth'][0, 0,
	torch.clamp(proj_vertices[0, :, 1].long(), 0, self.config.size-1),
	torch.clamp(proj_vertices[0, :, 0].long(), 0, self.config.size-1),
	]
	is_visible_verts_idx = grabbed_depth < proj_vertices[0, :, 2] + 1e-2
	if not self.config.occ_filter:
	is_visible_verts_idx = torch.ones_like(is_visible_verts_idx)


	if outer_iter is None:
	frame_id = str(self.frame).zfill(5)
	else:
	frame_id = str(self.frame + 10*outer_iter).zfill(5)

	if uv_map is not None and is_final:
	# uv losses visualizations
	proj_vertices = proj_vertices[:, self.uv_loss_fn.valid_vertex_index, :]
	can_uv = torch.from_numpy(np.load(env_paths.FLAME_UV_COORDS)).cuda().unsqueeze(0).float()[:, self.uv_loss_fn.valid_vertex_index, :]
	valid_verts_visibility = is_visible_verts_idx[self.uv_loss_fn.valid_vertex_index]
	#can_uv[..., 0] = (can_uv[..., 0] * -1) + 1
	can_uv[..., 1] = (can_uv[..., 1] * -1) + 1
	#can_uv = can_uv[:, ::50, :]
	gt_uv = uv_map[:, :2, :, :].permute(0, 2, 3, 1)
	gt_uv = gt_uv.reshape(gt_uv.shape[0], -1, 2) # B x n_pixel x 2
	can_uv = can_uv.repeat(gt_uv.shape[0], 1, 1)
	knn_result = knn_points(can_uv, gt_uv)

	pixel_position_width = knn_result.idx % uv_map.shape[-1]
	pixel_position_height = knn_result.idx // uv_map.shape[-2]

	dists = knn_result.dists.clone()

	gt_2_verts = torch.cat([pixel_position_width, pixel_position_height], dim=-1)

	pred_normals = ops['normal_images'] # 1 3 512 512 normals in world space
	rot_mat = rotation_6d_to_matrix(R.detach().repeat_interleave(num_views, dim=0)) # 1 3 3
	pred_normals_flame_space = torch.einsum('bxy,bxhw->byhw', rot_mat, pred_normals)

	delta = self.config.uv_loss.delta_uv
	catted_uv_rows = []
	for b_i in range(images.shape[0]):
	empty = images[b_i].detach().cpu().numpy().copy().transpose(1, 2, 0)
	is_valid_uv_corresp = (dists[b_i, :, 0] < delta) & valid_verts_visibility
	valid_pred_2d = proj_vertices[b_i, is_valid_uv_corresp, :]
	valid_gt_2d = gt_2_verts[b_i, is_valid_uv_corresp, :]
	pixels_pred = torch.stack(
	[
	torch.clamp(valid_pred_2d[:, 0], 0, images.shape[-1] - 1),
	torch.clamp(valid_pred_2d[:, 1], 0, images.shape[-2] - 1),
	], dim=-1
	).int()
	pixels_gt = torch.stack(
	[
	torch.clamp(valid_gt_2d[:, 0], 0, images.shape[-1] - 1),
	torch.clamp(valid_gt_2d[:, 1], 0, images.shape[-2] - 1),
	], dim=-1
	).int()

	if self.config.draw_uv_corresp:
	empty = plot_points(empty, pts=pixels_pred.detach().cpu().numpy(), pts2=pixels_gt.detach().cpu().numpy())


	gt_uv = uv_map[:, :2, :, :].permute(0, 2, 3, 1)

	upper_forehead = ((uv_map[:, 0, :, :].abs() < 0.85) &
	(uv_map[:, 0, :, :].abs() > (1 - 0.85)) &
	(uv_map[:, 1, :, :] < 0.35) &
	(uv_map[:, 1, :, :] > 0.)).float()
	upper_forehead = (gaussian_blur(upper_forehead, [self.config.normal_mask_ksize, self.config.normal_mask_ksize], sigma=[self.config.normal_mask_ksize, self.config.normal_mask_ksize]) > 0).float()
	losses_sil = ((1 - upper_forehead[:, None, :, :]) * (batch['fg_mask'] - ops['fg_images'])).abs().permute(0, 2, 3, 1)


	uv_loss = ((gt_uv - ops['uv_images']) * ops['mask_images'][:, 0, ...].unsqueeze(-1)).abs()
	#catted_uv = torch.cat([gt_uv[b_i], ops['uv_images'][b_i], uv_loss[b_i]], dim=1).detach().cpu().numpy()
	catted_uv = torch.cat([losses_sil[b_i][..., :2], uv_loss[b_i]], dim=1).detach().cpu().numpy()
	catted_uv_I = np.zeros([catted_uv.shape[0], catted_uv.shape[1], 3])
	catted_uv_I[:, :, :2] = catted_uv
	catted_uv_I = (catted_uv_I * 255).astype(np.uint8)
	shape_mask = ((ops['alpha_images'] * ops['mask_images_mesh']) > 0.).int()[b_i]
	shape = (pred_normals_flame_space[b_i]+1)/2 * shape_mask
	blend = images[b_i] * (1 - shape_mask) + images[b_i] * shape_mask * 0.3 + shape * 0.7 * shape_mask
	to_be_catted = [(images[b_i].cpu().permute(1, 2, 0).numpy()*255).astype(np.uint8),
	(blend.permute(1, 2, 0).detach().cpu().numpy()*255).astype(np.uint8),
	]
	if self.config.draw_uv_corresp:
	to_be_catted.append(catted_uv_I)
	to_be_catted.append(empty)
	catted_uv_I = np.concatenate(to_be_catted, axis=1)
	catted_uv_rows.append(catted_uv_I)

	if normal_map is None:
	catted_uv_I = Image.fromarray(np.concatenate(catted_uv_rows, axis=0))

	#pl = pv.Plotter()
	#pl.add_mesh(trim)
	#pl.add_points(visible_verts)
	#pl.show()

	else:
	catted_uv_I = None
	catted_uv_rows = []

	if normal_map is not None:
	dilated_eye_mask = 1 - (gaussian_blur(ops['mask_images_eyes'], [self.config.normal_mask_ksize, self.config.normal_mask_ksize], sigma=[1, 1]) > 0).float()
	l_map = (normal_map - pred_normals_flame_space)
	valid = ((l_map.abs().sum(dim=1)/3) < self.config.delta_n).unsqueeze(1)




	predicted_normal = ((pred_normals_flame_space.permute(0, 2, 3, 1)[...,
	:3] + 1) / 2 * 255).detach().cpu().numpy().astype(np.uint8)
	if self.config.draw_uv_corresp:
	normal_loss_map = l_map * valid.float() * batch["normal_mask"] * dilated_eye_mask
	pseudo_normal = ((normal_map.permute(0, 2, 3, 1) + 1) / 2 * 255).detach().cpu().numpy().astype(
	np.uint8)
	normal_loss_map = (
	(normal_loss_map.abs().permute(0, 2, 3, 1)) / 2 * 255).detach().cpu().numpy().astype(
	np.uint8)
	catted = np.concatenate([pseudo_normal, predicted_normal, normal_loss_map], axis=2)
	else:
	catted = predicted_normal
	# Image.fromarray(catted).show()
	# print('hi')

	for b_i in range(catted.shape[0]):
	if len(catted_uv_rows) > 0:
	catted_uv_rows[b_i] = np.concatenate([catted_uv_rows[b_i], catted[b_i]], axis=1)
	else:
	catted_uv_rows.append(catted[b_i])


	catted_uv_I = Image.fromarray(np.concatenate(catted_uv_rows, axis=0))

	#if catted_uv_I is not None:
	# save_fodler_uv = f'{savefolder}'
	# os.makedirs(save_fodler_uv, exist_ok=True)
	# if is_final:
	# catted_uv_I.save(f'{save_fodler_uv}/{timestep}.png')
	# else:
	# catted_uv_I.save(f'{save_fodler_uv}/{self.frame}.png')


	if not save:
	return

	# CHECKPOINT
	self.save_checkpoint(timestep, selected_frames=selected_frames)
	return catted_uv_I



	def parse_landmarks(self, batch):
	images = batch['rgb']
	if 'lmk' in batch:
	landmarks = batch['lmk']
	lmk68 = landmarks[:, WFLW_2_iBUG68, :]
	lmk_mask = ~(lmk68.sum(2, keepdim=True) == 0)
	batch['left_iris'] = landmarks[:, 96:97, :]
	batch['right_iris'] = landmarks[:, 97:98, :]
	batch['mask_left_iris'] = ~(landmarks.sum(2, keepdim=True) == 0)[:, 96:97, :]
	batch['mask_right_iris'] = ~(landmarks.sum(2, keepdim=True) == 0)[:, 97:98, :]

	landmarks = lmk68
	else:
	landmarks = lmk_mask = None

	return images, landmarks, lmk_mask,


	def read_data(self, timestep):
	DATA_FOLDER = f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}'
	P3DMM_FOLDER = f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/p3dmm/'

	try:
	rgb = np.array(Image.open(f'{DATA_FOLDER}/cropped/{timestep:05d}.jpg').resize((self.config.size, self.config.size))) / 255
	except Exception as ex:
	rgb = np.array(Image.open(f'{DATA_FOLDER}/cropped/{timestep:05d}.png').resize((self.config.size, self.config.size))) / 255

	mica_folder = f'{DATA_FOLDER}/mica'
	mica_files = os.listdir(mica_folder)
	mica_shapes = []
	for mica_file in mica_files:
	mica_shape = np.load(f'{mica_folder}/{mica_file}/identity.npy')
	mica_shapes.append(np.squeeze(mica_shape))
	mica_shapes = np.stack(mica_shapes, axis=0)
	if self.config.early_exit:
	mica_shape = mica_shapes[0, :]
	else:
	mica_shape = np.mean(mica_shapes, axis=0)

	seg = np.array(Image.open(f'{DATA_FOLDER}/seg_og/{timestep:05d}.png').resize((self.config.size, self.config.size), Image.NEAREST))
	if len(seg.shape) == 3:
	seg = seg[..., 0]
	uv_mask = ((seg == 2) \| (seg == 6) \| (seg == 7) \|
	(seg == 10) \| (seg == 12) \| (seg == 13) \|
	(seg==1) \| # neck
	(seg == 4) \| (seg==5) # ears
	)

	normal_mask = ((seg == 2) \| (seg == 6) \| (seg == 7) \|
	(seg == 10) \| (seg == 12) \| (seg == 13)
	) \| (seg == 11) # mouth interior
	if self.config.big_normal_mask:
	normal_mask = normal_mask \| (seg==1) \| (seg == 4) \| (seg==5) # add neck and ears

	fg_mask = ((seg == 2) \| (seg == 6) \| (seg == 7) \| (seg == 8) \| (seg == 9) \| #(seg == 4) \| (seg == 5) \|
	(seg == 10) \| (seg == 12) \| (seg == 13)
	)

	valid_bg = seg <= 1

	try:
	normals = ((np.array(Image.open(f'{P3DMM_FOLDER}/normals/{timestep:05d}.png').resize((self.config.size, self.config.size))) / 255).astype(np.float32) - 0.5 )*2
	uv_map = (np.array(Image.open(f'{P3DMM_FOLDER}/uv_map/{timestep:05d}png').resize((self.config.size, self.config.size))) / 255).astype(np.float32)
	except Exception as ex:
	normals = ((np.array(Image.open(f'{P3DMM_FOLDER}/normals/{timestep:05d}.png').resize((self.config.size, self.config.size))) / 255).astype(
	np.float32) - 0.5) * 2
	uv_map = (np.array(Image.open(f'{P3DMM_FOLDER}/uv_map/{timestep:05d}.png').resize((self.config.size, self.config.size))) / 255).astype(np.float32)

	try:
	lms = np.load(f'{DATA_FOLDER}/PIPnet_landmarks/{timestep:05d}.npy') * self.config.size
	except Exception as ex:
	lms = np.zeros([98, 2])

	ret_dict = {
	'rgb': rgb,
	'mica_shape': mica_shape,
	'normals': normals,
	'uv_map': uv_map,
	'uv_mask': uv_mask,
	'normal_mask': normal_mask,
	'fg_mask': fg_mask,
	'valid_bg': valid_bg,
	}
	if lms is not None:
	ret_dict['lmk'] = lms


	ret_dict = {k: torch.from_numpy(v).float().unsqueeze(0).cuda() for k,v in ret_dict.items()}

	ret_dict['uv_mask'] = ret_dict['uv_mask'][:, :, :, None].repeat(1, 1, 1, 3)
	ret_dict['normal_mask'] = ret_dict['normal_mask'][:, :, :, None].repeat(1, 1, 1, 3)
	ret_dict['fg_mask'] = ret_dict['fg_mask'][:, :, :, None].repeat(1, 1, 1, 3)

	channels_first =['rgb', 'uv_mask', 'normal_mask', 'normals', 'uv_map', 'fg_mask']
	for k in channels_first:
	ret_dict[k] = ret_dict[k].permute(0, 3, 1, 2)

	return ret_dict


	def prepare_global_optimization(self, N_FRAMES):
	is_sparse=True
	self.exp = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=100, sparse=is_sparse, ).cuda()
	self.R = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=6, sparse=is_sparse).cuda()
	self.t = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=3, sparse=is_sparse).cuda()
	self.eyes = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=12, sparse=is_sparse).cuda()
	self.eyelids = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=12, sparse=is_sparse).cuda()
	self.jaw = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=6, sparse=is_sparse).cuda()
	self.neck = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=6, sparse=is_sparse).cuda()
	if not self.config.global_camera:
	self.focal_length = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=1, sparse=is_sparse).cuda()
	self.principal_point = nn.Embedding(num_embeddings=N_FRAMES, embedding_dim=2, sparse=is_sparse).cuda()

	exp = torch.cat(self.intermediate_exprs, dim=0)
	R = torch.cat(self.intermediate_Rs, dim=0)
	t = torch.cat(self.intermediate_ts, dim=0)
	eyes = torch.cat(self.intermediate_eyes, dim=0)
	eyelids = torch.cat(self.intermediate_eyelids, dim=0)
	jaw = torch.cat(self.intermediate_jaws, dim=0)
	neck = torch.cat(self.intermediate_necks, dim=0)
	if not self.config.global_camera:
	focal_length = torch.cat(self.intermediate_fls, dim=0)
	principal_point = torch.cat(self.intermediate_pps, dim=0)

	with torch.no_grad():
	self.exp.weight = torch.nn.Parameter(exp)
	self.R.weight = torch.nn.Parameter(R)
	self.t.weight = torch.nn.Parameter(t)
	self.eyes.weight = torch.nn.Parameter(eyes)
	self.eyelids.weight = torch.nn.Parameter(eyelids)
	self.jaw.weight = torch.nn.Parameter(jaw)
	self.neck.weight = torch.nn.Parameter(neck)
	if not self.config.global_camera:
	self.focal_length.weight = torch.nn.Parameter(focal_length)
	self.principal_point.weight = torch.nn.Parameter(principal_point)


	def run(self):
	timestep = self.config.start_frame
	batch = self.read_data(timestep=timestep)

	# Important to initialize
	self.create_parameters(0, batch['mica_shape'])
	self.frame = 0

	print('''
	<<<<<<<< STARTING ONLINE TRACKING PHASE >>>>>>>>
	''')

	for timestep in range(self.config.start_frame, self.MAX_STEPS + self.config.start_frame, self.FRAME_SKIP):
	batch = self.read_data(timestep=timestep)
	for k in batch.keys():
	if k not in self.cached_data:
	self.cached_data[k] = [batch[k]]
	else:
	self.cached_data[k].append(batch[k])
	if timestep == self.config.start_frame:
	self.optimize_camera(batch, steps=500, is_first_frame=True)
	params = lambda: self.clone_params_keyframes_all(freeze_id=False, freeze_cam=self.config.global_camera, include_neck=self.config.include_neck)
	is_first_step = True
	else:
	if self.config.extra_cam_steps:
	self.optimize_camera(batch, steps=10, is_first_frame=False)
	params = lambda: self.clone_params_keyframes_all(freeze_id=True, freeze_cam=self.config.global_camera, include_neck=self.config.include_neck)
	is_first_step = False


	self.optimize_color(batch, params,
	no_lm=self.no_lm,
	save_timestep=timestep,
	is_first_step=is_first_step
	)

	self.uv_loss_fn.is_next()
	#self.checkpoint(batch, visualizations=[[View.GROUND_TRUTH, View.COLOR_OVERLAY, View.LANDMARKS, View.SHAPE]], frame_dst='/initialization', outer_iter=0, timestep=timestep, is_final=True, save=True)
	self.frame += 1

	# save results for global optimization later
	self.intermediate_exprs.append(self.exp.detach().clone())
	self.intermediate_Rs.append(self.R.detach().clone())
	self.intermediate_ts.append(self.t.detach().clone())
	self.intermediate_eyes.append(self.eyes.detach().clone())
	self.intermediate_eyelids.append(self.eyelids.detach().clone())
	self.intermediate_jaws.append(self.jaw.detach().clone())
	self.intermediate_necks.append(self.neck.detach().clone())
	if not self.config.global_camera:
	self.intermediate_fls.append(self.focal_length.detach().clone())
	self.intermediate_pps.append(self.principal_point.detach().clone())

	if self.config.early_exit:
	exit()

	for k in self.cached_data.keys():
	self.cached_data[k] = torch.cat(self.cached_data[k], dim=0)

	params = lambda: self.clone_params_keyframes_all_joint(freeze_id=False, is_joint=True, include_neck=self.config.include_neck)


	if self.config.uv_map_super > 0.0:
	self.uv_loss_fn.finish_stage1()

	self.config.iters = self.config.global_iters #self.config.iters * 10

	N_FRAMES = len(self.intermediate_exprs)
	#build optimization targets for global optimization, implement as sparse torch.Embedding
	self.prepare_global_optimization(N_FRAMES=N_FRAMES)


	if COMPILE:
	self.flame = torch.compile(self.flame)
	self.opt_pre = torch.compile(self.opt_pre)
	self.opt_post = torch.compile(self.opt_post)

	print('''
	<<<<<<<< STARTING GLOBAL TRACKING PHASE >>>>>>>>
	''')

	if N_FRAMES > 1:

	self.optimize_color(None, params,
	no_lm=self.no_lm,
	save_timestep=1000, #timestep,
	is_joint=True,
	)


	# render result and save it as a video to get some viusal feedback
	video_frames = []
	for it, timestep in enumerate(range(self.config.start_frame, self.MAX_STEPS + self.config.start_frame, self.FRAME_SKIP)):
	selected_frames = []
	selected_frames_loading = []
	batches = []
	batch = self.read_data(timestep=timestep)
	batches.append(batch)
	selected_frames.append(it)
	selected_frames_loading.append(timestep)
	batches = {k: torch.cat([x[k] for x in batches], dim=0) for k in batch.keys()}
	selected_frames = torch.from_numpy(np.array(selected_frames)).long().cuda()

	result_rendering = self.render_and_save(batch,
	visualizations=[[View.SHAPE]], # ← only mesh by default
	frame_dst='/video', save=True, dump_directly=False, outer_iter=0, timestep=timestep, is_final=True, selected_frames=selected_frames)
	video_frames.append(np.array(result_rendering))
	self.frame += 1

	out_dir = f"{self.save_folder}/{self.config.video_name}/frames"
	os.makedirs(out_dir, exist_ok=True)

	for i, frame in enumerate(video_frames):
	# If float in [0,1], convert:
	if frame.dtype != np.uint8:
	frame_uint8 = (frame * 255).astype(np.uint8)
	else:
	frame_uint8 = frame
	# OpenCV expects BGR ordering:
	bgr = cv2.cvtColor(frame_uint8, cv2.COLOR_RGB2BGR)
	cv2.imwrite(os.path.join(out_dir, f"{i:05d}.jpg"), bgr)

	print(f"✅ Saved {len(video_frames)} frames to `{out_dir}`")

	# Optionally delete all preoprocessing artifacts, once tracking is done (only keep cropped images)
	if self.config.delete_preprocessing:
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/mica')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/p3dmm')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/p3dmm_wGT')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/p3dmm_extraViz')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/pipnet')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/PIPnet_annotated_images')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/PIPnet_landmarks')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/rgb')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/seg_non_crop_annotations')
	shutil.rmtree(f'{env_paths.PREPROCESSED_DATA}/{self.config.video_name}/seg_og')


	print(f'''
	<<<<<<<< DONE WITH TRACKING {self.actor_name} >>>>>>>>
	''')