Spaces:

dreamlessx
/

LandmarkDiff

Sleeping

App Files Files Community

LandmarkDiff / landmarkdiff /postprocess.py

dreamlessx

Upload landmarkdiff/postprocess.py with huggingface_hub

83b71db verified 3 months ago

raw

history blame

14.7 kB

	"""Post-processing: CodeFormer/GFPGAN face restore, Real-ESRGAN bg,
	Laplacian blend, sharpening, histogram matching, ArcFace identity gate.
	"""

	from __future__ import annotations

	import cv2
	import numpy as np


	def laplacian_pyramid_blend(
	source: np.ndarray,
	target: np.ndarray,
	mask: np.ndarray,
	levels: int = 6,
	) -> np.ndarray:
	"""Laplacian pyramid blend - kills the 'pasted on' look from alpha blending."""
	# Ensure same size
	h, w = target.shape[:2]
	source = cv2.resize(source, (w, h)) if source.shape[:2] != (h, w) else source

	# Normalize mask
	mask_f = mask.astype(np.float32)
	if mask_f.max() > 1.0:
	mask_f = mask_f / 255.0
	if mask_f.ndim == 2:
	mask_3ch = np.stack([mask_f] * 3, axis=-1)
	else:
	mask_3ch = mask_f

	# Make dimensions divisible by 2^levels
	factor = 2 ** levels
	new_h = (h + factor - 1) // factor * factor
	new_w = (w + factor - 1) // factor * factor

	if new_h != h or new_w != w:
	source = cv2.resize(source, (new_w, new_h))
	target = cv2.resize(target, (new_w, new_h))
	mask_3ch = cv2.resize(mask_3ch, (new_w, new_h))

	src_f = source.astype(np.float32)
	tgt_f = target.astype(np.float32)

	# Build Gaussian pyramids for the mask
	mask_pyr = [mask_3ch]
	for _ in range(levels):
	mask_pyr.append(cv2.pyrDown(mask_pyr[-1]))

	# Build Laplacian pyramids for source and target
	src_lap = _build_laplacian_pyramid(src_f, levels)
	tgt_lap = _build_laplacian_pyramid(tgt_f, levels)

	# Blend each level using the mask at that resolution
	blended_lap = []
	for i in range(levels + 1):
	sl = src_lap[i]
	tl = tgt_lap[i]
	ml = mask_pyr[i]
	# Resize mask to match level shape if needed
	if ml.shape[:2] != sl.shape[:2]:
	ml = cv2.resize(ml, (sl.shape[1], sl.shape[0]))
	blended = sl * ml + tl * (1.0 - ml)
	blended_lap.append(blended)

	# Reconstruct from blended Laplacian
	result = _reconstruct_from_laplacian(blended_lap)

	# Crop back to original size
	result = result[:h, :w]
	return np.clip(result, 0, 255).astype(np.uint8)


	def _build_laplacian_pyramid(
	image: np.ndarray,
	levels: int,
	) -> list[np.ndarray]:
	"""Build Laplacian pyramid from an image."""
	gaussian = [image.copy()]
	for _ in range(levels):
	gaussian.append(cv2.pyrDown(gaussian[-1]))

	laplacian = []
	for i in range(levels):
	upsampled = cv2.pyrUp(gaussian[i + 1])
	# Match sizes (pyrUp can add a pixel)
	gh, gw = gaussian[i].shape[:2]
	upsampled = upsampled[:gh, :gw]
	laplacian.append(gaussian[i] - upsampled)

	laplacian.append(gaussian[-1]) # coarsest level
	return laplacian


	def _reconstruct_from_laplacian(pyramid: list[np.ndarray]) -> np.ndarray:
	"""Reconstruct image from Laplacian pyramid."""
	image = pyramid[-1].copy()
	for i in range(len(pyramid) - 2, -1, -1):
	image = cv2.pyrUp(image)
	lh, lw = pyramid[i].shape[:2]
	image = image[:lh, :lw]
	image = image + pyramid[i]
	return image


	def frequency_aware_sharpen(
	image: np.ndarray,
	strength: float = 0.3,
	radius: int = 3,
	) -> np.ndarray:
	"""Unsharp mask on LAB luminance only - sharpens skin texture without color fringe."""
	lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
	l_channel = lab[:, :, 0]

	# Unsharp mask on luminance only
	ksize = radius * 2 + 1
	blurred = cv2.GaussianBlur(l_channel, (ksize, ksize), 0)
	sharpened = l_channel + strength * (l_channel - blurred)

	lab[:, :, 0] = np.clip(sharpened, 0, 255)
	return cv2.cvtColor(lab.astype(np.uint8), cv2.COLOR_LAB2BGR)


	def restore_face_gfpgan(
	image: np.ndarray,
	upscale: int = 1,
	) -> np.ndarray:
	"""GFPGAN face restore. Returns original if not installed."""
	try:
	from gfpgan import GFPGANer
	except ImportError:
	return image

	try:
	restorer = GFPGANer(
	model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
	upscale=upscale,
	arch="clean",
	channel_multiplier=2,
	bg_upsampler=None,
	)
	_, _, restored = restorer.enhance(
	image,
	has_aligned=False,
	only_center_face=True,
	paste_back=True,
	)
	if restored is not None:
	return restored
	except Exception:
	pass

	return image


	def restore_face_codeformer(
	image: np.ndarray,
	fidelity: float = 0.7,
	upscale: int = 1,
	) -> np.ndarray:
	"""CodeFormer face restore. fidelity: 0=quality, 1=identity. Returns original if not installed."""
	try:
	from codeformer.basicsr.utils import img2tensor, tensor2img
	from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
	from codeformer.basicsr.utils.download_util import load_file_from_url
	import torch
	from torchvision.transforms.functional import normalize as tv_normalize
	except ImportError:
	return image

	try:
	from codeformer.inference_codeformer import set_realesrgan as _unused # noqa: F401
	from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model = CodeFormerArch(
	dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
	connect_list=["32", "64", "128", "256"],
	).to(device)

	ckpt_path = load_file_from_url(
	url="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth",
	model_dir="weights/CodeFormer",
	progress=True,
	)
	checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
	model.load_state_dict(checkpoint["params_ema"])
	model.eval()

	face_helper = FaceRestoreHelper(
	upscale,
	face_size=512,
	crop_ratio=(1, 1),
	det_model="retinaface_resnet50",
	save_ext="png",
	device=device,
	)
	face_helper.read_image(image)
	face_helper.get_face_landmarks_5(only_center_face=True)
	face_helper.align_warp_face()

	for cropped_face in face_helper.cropped_faces:
	face_t = img2tensor(cropped_face / 255.0, bgr2rgb=True, float32=True)
	tv_normalize(face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
	face_t = face_t.unsqueeze(0).to(device)

	with torch.no_grad():
	output = model(face_t, w=fidelity, adain=True)[0]
	restored = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
	restored = restored.astype(np.uint8)
	face_helper.add_restored_face(restored)

	face_helper.get_inverse_affine(None)
	restored_img = face_helper.paste_faces_to_image()
	if restored_img is not None:
	return restored_img
	except Exception:
	pass

	return image


	def enhance_background_realesrgan(
	image: np.ndarray,
	mask: np.ndarray,
	outscale: int = 2,
	) -> np.ndarray:
	"""Real-ESRGAN on background only (outside mask). Returns original if not installed."""
	try:
	from realesrgan import RealESRGANer
	from basicsr.archs.rrdbnet_arch import RRDBNet
	import torch
	except ImportError:
	return image

	try:
	model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
	upsampler = RealESRGANer(
	scale=4,
	model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
	model=model,
	tile=400,
	tile_pad=10,
	pre_pad=0,
	half=torch.cuda.is_available(),
	)
	enhanced, _ = upsampler.enhance(image, outscale=outscale)

	# Downscale back to original size
	h, w = image.shape[:2]
	enhanced = cv2.resize(enhanced, (w, h), interpolation=cv2.INTER_LANCZOS4)

	# Only apply enhancement to background (outside mask)
	mask_f = mask.astype(np.float32)
	if mask_f.max() > 1.0:
	mask_f /= 255.0
	if mask_f.ndim == 2:
	mask_3ch = np.stack([mask_f] * 3, axis=-1)
	else:
	mask_3ch = mask_f

	# Keep face region from original, use enhanced for background
	result = (
	image.astype(np.float32) * mask_3ch
	+ enhanced.astype(np.float32) * (1.0 - mask_3ch)
	).astype(np.uint8)
	return result
	except Exception:
	pass

	return image


	def verify_identity_arcface(
	original: np.ndarray,
	result: np.ndarray,
	threshold: float = 0.6,
	) -> dict:
	"""ArcFace cosine similarity check. Flags if output drifted from input identity."""
	try:
	from insightface.app import FaceAnalysis
	except ImportError:
	return {
	"similarity": -1.0,
	"passed": True,
	"message": "InsightFace not installed - identity check skipped",
	}

	try:
	app = FaceAnalysis(
	name="buffalo_l",
	providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
	)
	app.prepare(ctx_id=0 if _has_cuda() else -1, det_size=(320, 320))

	orig_faces = app.get(original)
	result_faces = app.get(result)

	if not orig_faces or not result_faces:
	return {
	"similarity": -1.0,
	"passed": True,
	"message": "Could not detect face in one/both images - check skipped",
	}

	orig_emb = orig_faces[0].embedding
	result_emb = result_faces[0].embedding

	sim = float(np.dot(orig_emb, result_emb) / (
	np.linalg.norm(orig_emb) * np.linalg.norm(result_emb) + 1e-8
	))
	sim = float(np.clip(sim, 0, 1))

	passed = sim >= threshold
	if passed:
	msg = f"Identity preserved (similarity={sim:.3f})"
	else:
	msg = f"WARNING: Identity drift detected (similarity={sim:.3f} < {threshold})"

	return {"similarity": sim, "passed": passed, "message": msg}
	except Exception as e:
	return {
	"similarity": -1.0,
	"passed": True,
	"message": f"Identity check failed: {e}",
	}


	def _has_cuda() -> bool:
	try:
	import torch
	return torch.cuda.is_available()
	except ImportError:
	return False


	def histogram_match_skin(
	source: np.ndarray,
	reference: np.ndarray,
	mask: np.ndarray,
	) -> np.ndarray:
	"""CDF-based histogram matching in LAB space. Better than mean/std for skin."""
	mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76

	if not np.any(mask_bool):
	return source

	result = source.copy()
	src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
	ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)

	for ch in range(3):
	src_vals = src_lab[:, :, ch][mask_bool]
	ref_vals = ref_lab[:, :, ch][mask_bool]

	if len(src_vals) == 0 or len(ref_vals) == 0:
	continue

	# CDF matching
	src_sorted = np.sort(src_vals)
	ref_sorted = np.sort(ref_vals)

	# Interpolate reference CDF to match source length
	src_cdf = np.linspace(0, 1, len(src_sorted))
	ref_cdf = np.linspace(0, 1, len(ref_sorted))

	# Map source values through reference distribution
	mapping = np.interp(src_cdf, ref_cdf, ref_sorted)

	# Create lookup from source intensity to matched intensity
	src_flat = src_lab[:, :, ch].ravel()
	matched = np.interp(src_flat, src_sorted, mapping)
	matched_2d = matched.reshape(src_lab.shape[:2])

	# Apply only in mask region
	src_lab[:, :, ch] = np.where(mask_bool, matched_2d, src_lab[:, :, ch])

	result_lab = np.clip(src_lab, 0, 255).astype(np.uint8)
	return cv2.cvtColor(result_lab, cv2.COLOR_LAB2BGR)


	def full_postprocess(
	generated: np.ndarray,
	original: np.ndarray,
	mask: np.ndarray,
	restore_mode: str = "codeformer",
	codeformer_fidelity: float = 0.7,
	use_realesrgan: bool = True,
	use_laplacian_blend: bool = True,
	sharpen_strength: float = 0.25,
	verify_identity: bool = True,
	identity_threshold: float = 0.6,
	) -> dict:
	"""Full pipeline: restore -> bg enhance -> histogram match -> sharpen -> blend -> identity check."""
	result = generated.copy()
	restore_used = "none"

	# Step 1: Neural face restoration (CodeFormer > GFPGAN > skip)
	if restore_mode == "codeformer":
	restored = restore_face_codeformer(result, fidelity=codeformer_fidelity)
	if restored is not result:
	result = restored
	restore_used = "codeformer"
	else:
	# CodeFormer unavailable, fall back to GFPGAN
	result = restore_face_gfpgan(result)
	restore_used = "gfpgan" if result is not generated else "none"
	elif restore_mode == "gfpgan":
	restored = restore_face_gfpgan(result)
	if restored is not result:
	result = restored
	restore_used = "gfpgan"

	# Step 2: Neural background enhancement
	if use_realesrgan:
	result = enhance_background_realesrgan(result, mask)

	# Step 3: Skin tone histogram matching (classical)
	result = histogram_match_skin(result, original, mask)

	# Step 4: Sharpen texture (classical)
	if sharpen_strength > 0:
	result = frequency_aware_sharpen(result, strength=sharpen_strength)

	# Step 5: Blend into original (classical)
	if use_laplacian_blend:
	composited = laplacian_pyramid_blend(result, original, mask)
	else:
	mask_f = mask.astype(np.float32)
	if mask_f.max() > 1.0:
	mask_f /= 255.0
	if mask_f.ndim == 2:
	mask_3ch = np.stack([mask_f] * 3, axis=-1)
	else:
	mask_3ch = mask_f
	composited = (
	result.astype(np.float32) * mask_3ch
	+ original.astype(np.float32) * (1.0 - mask_3ch)
	).astype(np.uint8)

	# Step 6: Neural identity verification
	identity_check = {"similarity": -1.0, "passed": True, "message": "skipped"}
	if verify_identity:
	identity_check = verify_identity_arcface(
	original, composited, threshold=identity_threshold,
	)

	return {
	"image": composited,
	"identity_check": identity_check,
	"restore_used": restore_used,
	}