Spaces:

muhammadmoon
/

live-portrait-chatbot

Running

App Files Files Community

live-portrait-chatbot / src /utils /helper.py

muhammadmoon

Fresh deployment to Hugging Face

720e7b1 2 months ago

raw

history blame contribute delete

6.19 kB

	# coding: utf-8

	"""
	utility functions and classes to handle feature extraction and model loading
	"""

	import os
	import os.path as osp
	import torch
	from collections import OrderedDict
	import numpy as np
	from scipy.spatial import ConvexHull # pylint: disable=E0401,E0611
	from typing import Union
	import cv2

	from ..modules.spade_generator import SPADEDecoder
	from ..modules.warping_network import WarpingNetwork
	from ..modules.motion_extractor import MotionExtractor
	from ..modules.appearance_feature_extractor import AppearanceFeatureExtractor
	from ..modules.stitching_retargeting_network import StitchingRetargetingNetwork


	def tensor_to_numpy(data: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
	"""transform torch.Tensor into numpy.ndarray"""
	if isinstance(data, torch.Tensor):
	return data.data.cpu().numpy()
	return data

	def calc_motion_multiplier(
	kp_source: Union[np.ndarray, torch.Tensor],
	kp_driving_initial: Union[np.ndarray, torch.Tensor]
	) -> float:
	"""calculate motion_multiplier based on the source image and the first driving frame"""
	kp_source_np = tensor_to_numpy(kp_source)
	kp_driving_initial_np = tensor_to_numpy(kp_driving_initial)

	source_area = ConvexHull(kp_source_np.squeeze(0)).volume
	driving_area = ConvexHull(kp_driving_initial_np.squeeze(0)).volume
	motion_multiplier = np.sqrt(source_area) / np.sqrt(driving_area)
	# motion_multiplier = np.cbrt(source_area) / np.cbrt(driving_area)

	return motion_multiplier

	def suffix(filename):
	"""a.jpg -> jpg"""
	pos = filename.rfind(".")
	if pos == -1:
	return ""
	return filename[pos + 1:]


	def prefix(filename):
	"""a.jpg -> a"""
	pos = filename.rfind(".")
	if pos == -1:
	return filename
	return filename[:pos]


	def basename(filename):
	"""a/b/c.jpg -> c"""
	return prefix(osp.basename(filename))


	def remove_suffix(filepath):
	"""a/b/c.jpg -> a/b/c"""
	return osp.join(osp.dirname(filepath), basename(filepath))


	def is_image(file_path):
	image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp')
	return file_path.lower().endswith(image_extensions)


	def is_video(file_path):
	if file_path.lower().endswith((".mp4", ".mov", ".avi", ".webm")) or osp.isdir(file_path):
	return True
	return False


	def is_template(file_path):
	if file_path.endswith(".pkl"):
	return True
	return False


	def mkdir(d, log=False):
	# return self-assined `d`, for one line code
	if not osp.exists(d):
	os.makedirs(d, exist_ok=True)
	if log:
	print(f"Make dir: {d}")
	return d


	def squeeze_tensor_to_numpy(tensor):
	out = tensor.data.squeeze(0).cpu().numpy()
	return out


	def dct2device(dct: dict, device):
	for key in dct:
	if isinstance(dct[key], torch.Tensor):
	dct[key] = dct[key].to(device)
	else:
	dct[key] = torch.tensor(dct[key]).to(device)
	return dct


	def concat_feat(kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
	"""
	kp_source: (bs, k, 3)
	kp_driving: (bs, k, 3)
	Return: (bs, 2k*3)
	"""
	bs_src = kp_source.shape[0]
	bs_dri = kp_driving.shape[0]
	assert bs_src == bs_dri, 'batch size must be equal'

	feat = torch.cat([kp_source.view(bs_src, -1), kp_driving.view(bs_dri, -1)], dim=1)
	return feat


	def remove_ddp_dumplicate_key(state_dict):
	state_dict_new = OrderedDict()
	for key in state_dict.keys():
	state_dict_new[key.replace('module.', '')] = state_dict[key]
	return state_dict_new


	def load_model(ckpt_path, model_config, device, model_type):
	model_params = model_config['model_params'][f'{model_type}_params']

	if model_type == 'appearance_feature_extractor':
	model = AppearanceFeatureExtractor(**model_params).to(device)
	elif model_type == 'motion_extractor':
	model = MotionExtractor(**model_params).to(device)
	elif model_type == 'warping_module':
	model = WarpingNetwork(**model_params).to(device)
	elif model_type == 'spade_generator':
	model = SPADEDecoder(**model_params).to(device)
	elif model_type == 'stitching_retargeting_module':
	# Special handling for stitching and retargeting module
	config = model_config['model_params']['stitching_retargeting_module_params']
	checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)

	stitcher = StitchingRetargetingNetwork(**config.get('stitching'))
	stitcher.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_shoulder']))
	stitcher = stitcher.to(device)
	stitcher.eval()

	retargetor_lip = StitchingRetargetingNetwork(**config.get('lip'))
	retargetor_lip.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_mouth']))
	retargetor_lip = retargetor_lip.to(device)
	retargetor_lip.eval()

	retargetor_eye = StitchingRetargetingNetwork(**config.get('eye'))
	retargetor_eye.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_eye']))
	retargetor_eye = retargetor_eye.to(device)
	retargetor_eye.eval()

	return {
	'stitching': stitcher,
	'lip': retargetor_lip,
	'eye': retargetor_eye
	}
	else:
	raise ValueError(f"Unknown model type: {model_type}")

	model.load_state_dict(torch.load(ckpt_path, map_location=lambda storage, loc: storage))
	model.eval()
	return model


	def load_description(fp):
	with open(fp, 'r', encoding='utf-8') as f:
	content = f.read()
	return content


	def is_square_video(video_path):
	video = cv2.VideoCapture(video_path)

	width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

	video.release()
	# if width != height:
	# gr.Info(f"Uploaded video is not square, force do crop (driving) to be True")

	return width == height

	def clean_state_dict(state_dict):
	new_state_dict = OrderedDict()
	for k, v in state_dict.items():
	if k[:7] == 'module.':
	k = k[7:] # remove `module.`
	new_state_dict[k] = v
	return new_state_dict