"""This script contains the image preprocessing code for Deep3DFaceRecon_pytorch """ import numpy as np from scipy.io import loadmat from PIL import Image import cv2 import os from skimage import transform as trans import torch import warnings warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) warnings.filterwarnings("ignore", category=FutureWarning) # calculating least square problem for image alignment def POS(xp, x): npts = xp.shape[1] A = np.zeros([2*npts, 8]) A[0:2*npts-1:2, 0:3] = x.transpose() A[0:2*npts-1:2, 3] = 1 A[1:2*npts:2, 4:7] = x.transpose() A[1:2*npts:2, 7] = 1 b = np.reshape(xp.transpose(), [2*npts, 1]) k, _, _, _ = np.linalg.lstsq(A, b) R1 = k[0:3] R2 = k[4:7] sTx = k[3] sTy = k[7] s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2 t = np.stack([sTx, sTy], axis=0) return t, s # resize and crop images for face reconstruction def resize_n_crop_img(img, lm, t, s, target_size=224., mask=None): w0, h0 = img.size w = (w0*s).astype(np.int32) h = (h0*s).astype(np.int32) left = (w/2 - target_size/2 + float((t[0] - w0/2)*s)).astype(np.int32) right = left + target_size up = (h/2 - target_size/2 + float((h0/2 - t[1])*s)).astype(np.int32) below = up + target_size img = img.resize((w, h), resample=Image.BICUBIC) img = img.crop((left, up, right, below)) if mask is not None: mask = mask.resize((w, h), resample=Image.BICUBIC) mask = mask.crop((left, up, right, below)) lm = np.stack([lm[:, 0] - t[0] + w0/2, lm[:, 1] - t[1] + h0/2], axis=1)*s lm = lm - np.reshape( np.array([(w/2 - target_size/2), (h/2-target_size/2)]), [1, 2]) return img, lm, mask # utils for face reconstruction def extract_5p(lm): lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 lm5p = np.stack([lm[lm_idx[0], :], np.mean(lm[lm_idx[[1, 2]], :], 0), np.mean( lm[lm_idx[[3, 4]], :], 0), lm[lm_idx[5], :], lm[lm_idx[6], :]], axis=0) lm5p = lm5p[[1, 2, 0, 3, 4], :] return lm5p # utils for face reconstruction def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.): """ Return: transparams --numpy.array (raw_W, raw_H, scale, tx, ty) img_new --PIL.Image (target_size, target_size, 3) lm_new --numpy.array (68, 2), y direction is opposite to v direction mask_new --PIL.Image (target_size, target_size) Parameters: img --PIL.Image (raw_H, raw_W, 3) lm --numpy.array (68, 2), y direction is opposite to v direction lm3D --numpy.array (5, 3) mask --PIL.Image (raw_H, raw_W, 3) """ try: # Debug input shapes print(f"\n[DEBUG] Input shapes - lm: {lm.shape}, lm3D: {lm3D.shape}") if hasattr(lm, 'shape') else None w0, h0 = img.size print(f"[DEBUG] Original image size: {w0}x{h0}") # Extract 5 facial landmarks if lm.shape[0] != 5: lm5p = extract_5p(lm) else: lm5p = lm print(f"[DEBUG] Landmark points shape: {lm5p.shape}") # Calculate translation and scale factors t, s = POS(lm5p.transpose(), lm3D.transpose()) s = rescale_factor / s # Ensure t is a flat numpy array with exactly 2 elements t = np.array(t).flatten() if len(t) != 2: raise ValueError(f"Translation vector t should have 2 elements, got {len(t)}: {t}") print(f"[DEBUG] Calculated values - t: {t}, s: {s}") # Process the image img_new, lm_new, mask_new = resize_n_crop_img( img, lm, t, s, target_size=target_size, mask=mask ) # Create transformation parameters with type checking trans_params = np.array([ float(w0), # Convert to float explicitly float(h0), # Convert to float explicitly float(s), # Convert to float explicitly float(t[0]), # First translation component float(t[1]) # Second translation component ], dtype=np.float32) print(f"[DEBUG] Transformation params: {trans_params}") return trans_params, img_new, lm_new, mask_new except Exception as e: print(f"\n[ERROR] in align_img(): {str(e)}") print("[DEBUG] Problem occurred with:") print(f"- img size: {img.size if img else 'None'}") print(f"- lm shape: {lm.shape if hasattr(lm, 'shape') else 'Not an array'}") print(f"- lm3D shape: {lm3D.shape if hasattr(lm3D, 'shape') else 'Not an array'}") print(f"- t: {t if 't' in locals() else 'Not calculated'}") print(f"- s: {s if 's' in locals() else 'Not calculated'}") raise