pixel3dmms

Build error

App Files Files Community

alexnasa commited on Jun 11, 2025

Commit

be22660

verified ·

1 Parent(s): 901df39

Upload network_inference.py

Browse files

Files changed (1) hide show

src/pixel3dmm/network_inference.py +191 -0

src/pixel3dmm/network_inference.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import traceback
+from tqdm import tqdm
+import os
+import torch
+import numpy as np
+from PIL import Image
+from omegaconf import OmegaConf
+from time import time
+from pixel3dmm.utils.uv import uv_pred_to_mesh
+from pixel3dmm.lightning.p3dmm_system import system as p3dmm_system
+#from pixel3dmm.lightning.system_flame_params_legacy import system as system_flame_params_legacy
+from pixel3dmm import env_paths
+def pad_to_3_channels(img):
+    if img.shape[-1] == 3:
+        return img
+    elif img.shape[-1] == 1:
+        return np.concatenate([img, np.zeros_like(img[..., :1]), np.zeros_like(img[..., :1])], axis=-1)
+    elif img.shape[-1] == 2:
+        return np.concatenate([img, np.zeros_like(img[..., :1])], axis=-1)
+    else:
+        raise ValueError('too many dimensions in prediction type!')
+def gaussian_fn(M, std):
+    n = torch.arange(0, M) - (M - 1.0) / 2.0
+    sig2 = 2 * std * std
+    w = torch.exp(-n ** 2 / sig2)
+    return w
+def gkern(kernlen=256, std=128):
+    """Returns a 2D Gaussian kernel array."""
+    gkern1d_x = gaussian_fn(kernlen, std=std * 5)
+    gkern1d_y = gaussian_fn(kernlen, std=std)
+    gkern2d = torch.outer(gkern1d_y, gkern1d_x)
+    return gkern2d
+valid_verts = np.load(f'{env_paths.VALID_VERTICES_WIDE_REGION}')
+def normals_n_uvs(cfg, model):
+    if cfg.model.prediction_type == 'flame_params':
+        cfg.data.mirror_aug = False
+    # data loader
+    if cfg.model.feature_map_type == 'DINO':
+        feature_map_size = 32
+    elif cfg.model.feature_map_type == 'sapiens':
+        feature_map_size = 64
+    batch_size = 1  # cfg.inference_batch_size
+    prediction_types = cfg.model.prediction_type.split(',')
+    conv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=11, bias=False, padding='same')
+    g_weights = gkern(11, 2)
+    g_weights /= torch.sum(g_weights)
+    conv.weight = torch.nn.Parameter(g_weights.unsqueeze(0).unsqueeze(0))
+    OUT_NAMES = str(cfg.video_name).split(',')
+    print(f"""
+            <<<<<<<< STARTING PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
+            """)
+    for OUT_NAME in OUT_NAMES:
+        folder = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/'
+        IMAGE_FOLDER = f'{folder}/cropped'
+        SEGMENTATION_FOLDER = f'{folder}/seg_og/'
+        out_folders = {}
+        out_folders_wGT = {}
+        out_folders_viz = {}
+        for prediction_type in prediction_types:
+            out_folders[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm/{prediction_type}/'
+            out_folders_wGT[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_wGT/{prediction_type}/'
+            os.makedirs(out_folders[prediction_type], exist_ok=True)
+            os.makedirs(out_folders_wGT[prediction_type], exist_ok=True)
+            out_folders_viz[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_extraViz/{prediction_type}/'
+            os.makedirs(out_folders_viz[prediction_type], exist_ok=True)
+        image_names = os.listdir(f'{IMAGE_FOLDER}')
+        image_names.sort()
+        if os.path.exists(out_folders[prediction_type]):
+            if len(os.listdir(out_folders[prediction_type])) == len(image_names):
+                return
+        for i in tqdm(range(len(image_names))):
+            try:
+                img = np.array(Image.open(f'{IMAGE_FOLDER}/{image_names[i]}').resize((512, 512))) / 255 # need 512,512 images as input; normalize to [0, 1] range
+                img = torch.from_numpy(img)[None, None].float().cuda() # 1,1,512,512,3
+                img_seg = np.array(Image.open(f'{SEGMENTATION_FOLDER}/{image_names[i][:-4]}.png').resize((512, 512), Image.NEAREST))
+                if len(img_seg.shape) == 3:
+                    img_seg = img_seg[..., 0]
+                #img_seg = np.array(Image.open(f'{SEGEMNTATION_FOLDER}/{int(image_names[i][:-4])*3:05d}.png').resize((512, 512), Image.NEAREST))
+                mask = ((img_seg == 2) | ((img_seg > 3) & (img_seg < 14)) ) &  ~(img_seg==11)
+                mask = torch.from_numpy(mask).long().cuda()[None, None] # 1, 1, 512, 512
+                #mask = torch.ones_like(img[..., 0]).cuda().bool()
+                batch = {
+                    'tar_msk': mask,
+                    'tar_rgb': img,
+                }
+                batch_mirrored = {
+                'tar_rgb': torch.flip(batch['tar_rgb'], dims=[3]).cuda(),
+                'tar_msk': torch.flip(batch['tar_msk'], dims=[3]).cuda(),
+                }
+                with torch.no_grad():
+                    output, conf = model.net(batch)
+                    output_mirrored, conf = model.net(batch_mirrored)
+                    if 'uv_map' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['uv_map'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        fliped_uv_pred[:, :, 0, :, :] += 2*0.0075
+                        output['uv_map'] = (output['uv_map'] + fliped_uv_pred)/2
+                    if 'normals' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['normals'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        output['normals'] = (output['normals'] + fliped_uv_pred)/2
+                    if 'disps' in output:
+                        fliped_uv_pred = torch.flip(output_mirrored['disps'], dims=[4])
+                        fliped_uv_pred[:, :, 0, :, :] *= -1
+                        output['disps'] = (output['disps'] + fliped_uv_pred)/2
+                for prediction_type in prediction_types:
+                    for i_batch in range(batch_size):
+                        i_view = 0
+                        gt_rgb = batch['tar_rgb']
+                        # normalize to [0,1] range
+                        if prediction_type == 'uv_map':
+                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 1) / 2, 0, 1)
+                        elif prediction_type == 'disps':
+                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 50) / 100, 0, 1)
+                        elif prediction_type in ['normals', 'normals_can']:
+                            tmp_output = output[prediction_type][i_batch, i_view]
+                            tmp_output = tmp_output / torch.norm(tmp_output, dim=0).unsqueeze(0)
+                            tmp_output = torch.clamp((tmp_output + 1) / 2, 0, 1)
+                            # undo "weird" convention of normals that I used for preprocessing
+                            tmp_output = torch.stack(
+                                [tmp_output[0, ...], 1 - tmp_output[2, ...], 1 - tmp_output[1, ...]],
+                                dim=0)
+                        content = [
+                            gt_rgb[i_batch, i_view].detach().cpu().numpy(),
+                            pad_to_3_channels(tmp_output.permute(1, 2, 0).detach().cpu().float().numpy()),
+                        ]
+                        catted = (np.concatenate(content, axis=1) * 255).astype(np.uint8)
+                        Image.fromarray(catted).save(f'{out_folders_wGT[prediction_type]}/{image_names[i]}')
+                        Image.fromarray(
+                            pad_to_3_channels(
+                                tmp_output.permute(1, 2, 0).detach().cpu().float().numpy() * 255).astype(
+                                np.uint8)).save(
+                            f'{out_folders[prediction_type]}/{image_names[i][:-4]}.png')
+                        # this visulization is quite slow, therefore disable it per default
+                        if prediction_type == 'uv_map' and cfg.viz_uv_mesh:
+                            to_show_non_mirr = uv_pred_to_mesh(
+                                output[prediction_type][i_batch:i_batch + 1, ...],
+                                batch['tar_msk'][i_batch:i_batch + 1, ...],
+                                batch['tar_rgb'][i_batch:i_batch + 1, ...],
+                                right_ear = [537, 1334, 857, 554, 941],
+                                left_ear = [541, 476, 237, 502, 286],
+                            )
+                            Image.fromarray(to_show_non_mirr).save(f'{out_folders_viz[prediction_type]}/{image_names[i]}')
+            except Exception:
+                traceback.print_exc()
+    print(f"""
+                <<<<<<<< FINISHED PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
+                """)