)
+
+ frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5)
+
+ # Mel and Linear spectrograms normalization/scaling and clipping
+ signal_normalization=True,
+ # Whether to normalize mel spectrograms to some predefined range (following below parameters)
+ allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True
+ symmetric_mels=True,
+ # Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2,
+ # faster and cleaner convergence)
+ max_abs_value=4.,
+ # max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not
+ # be too big to avoid gradient explosion,
+ # not too small for fast convergence)
+ # Contribution by @begeekmyfriend
+ # Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude
+ # levels. Also allows for better G&L phase reconstruction)
+ preemphasize=True, # whether to apply filter
+ preemphasis=0.97, # filter coefficient.
+
+ # Limits
+ min_level_db=-100,
+ ref_level_db=20,
+ fmin=55,
+ # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To
+ # test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
+ fmax=7600, # To be increased/reduced depending on data.
+
+ ###################### Our training parameters #################################
+ img_size=96,
+ fps=25,
+
+ batch_size=2,
+ initial_learning_rate=1e-3,
+ nepochs=100000, ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
+ num_workers=0,
+ checkpoint_interval=10000,
+ eval_interval=10,
+ writer_interval=5,
+ save_optimizer_state=True,
+
+ syncnet_wt=0.0, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence.
+ syncnet_batch_size=64,
+ syncnet_lr=1e-4,
+ syncnet_eval_interval=10000,
+ syncnet_checkpoint_interval=10000,
+
+ disc_wt=0.07,
+ disc_initial_learning_rate=1e-4,
+)
+
+
+def hparams_debug_string():
+ values = hparams.values()
+ hp = [" %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"]
+ return "Hyperparameters:\n" + "\n".join(hp)
diff --git a/scripts/utils/init_path.py b/scripts/utils/init_path.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f38d11907bd0dc789992062ce7f02d8876c638f
--- /dev/null
+++ b/scripts/utils/init_path.py
@@ -0,0 +1,47 @@
+import os
+import glob
+
+def init_path(checkpoint_dir, config_dir, size=512, old_version=False, preprocess='crop'):
+
+ if old_version:
+ #### load all the checkpoint of `pth`
+ sadtalker_paths = {
+ 'wav2lip_checkpoint' : os.path.join(checkpoint_dir, 'wav2lip.pth'),
+ 'audio2pose_checkpoint' : os.path.join(checkpoint_dir, 'auido2pose_00140-model.pth'),
+ 'audio2exp_checkpoint' : os.path.join(checkpoint_dir, 'auido2exp_00300-model.pth'),
+ 'free_view_checkpoint' : os.path.join(checkpoint_dir, 'facevid2vid_00189-model.pth.tar'),
+ 'path_of_net_recon_model' : os.path.join(checkpoint_dir, 'epoch_20.pth')
+ }
+
+ use_safetensor = False
+ elif len(glob.glob(os.path.join(checkpoint_dir, '*.safetensors'))):
+ print('using safetensor as default')
+ sadtalker_paths = {
+ "checkpoint":os.path.join(checkpoint_dir, 'SadTalker_V0.0.2_'+str(size)+'.safetensors'),
+ }
+ use_safetensor = True
+ else:
+ print("WARNING: The new version of the model will be updated by safetensor, you may need to download it mannully. We run the old version of the checkpoint this time!")
+ use_safetensor = False
+
+ sadtalker_paths = {
+ 'wav2lip_checkpoint' : os.path.join(checkpoint_dir, 'wav2lip.pth'),
+ 'audio2pose_checkpoint' : os.path.join(checkpoint_dir, 'auido2pose_00140-model.pth'),
+ 'audio2exp_checkpoint' : os.path.join(checkpoint_dir, 'auido2exp_00300-model.pth'),
+ 'free_view_checkpoint' : os.path.join(checkpoint_dir, 'facevid2vid_00189-model.pth.tar'),
+ 'path_of_net_recon_model' : os.path.join(checkpoint_dir, 'epoch_20.pth')
+ }
+
+ sadtalker_paths['dir_of_BFM_fitting'] = os.path.join(config_dir) # , 'BFM_Fitting'
+ sadtalker_paths['audio2pose_yaml_path'] = os.path.join(config_dir, 'auido2pose.yaml')
+ sadtalker_paths['audio2exp_yaml_path'] = os.path.join(config_dir, 'auido2exp.yaml')
+ sadtalker_paths['use_safetensor'] = use_safetensor # os.path.join(config_dir, 'auido2exp.yaml')
+
+ if 'full' in preprocess:
+ sadtalker_paths['mappingnet_checkpoint'] = os.path.join(checkpoint_dir, 'mapping_00109-model.pth.tar')
+ sadtalker_paths['facerender_yaml'] = os.path.join(config_dir, 'facerender_still.yaml')
+ else:
+ sadtalker_paths['mappingnet_checkpoint'] = os.path.join(checkpoint_dir, 'mapping_00229-model.pth.tar')
+ sadtalker_paths['facerender_yaml'] = os.path.join(config_dir, 'facerender.yaml')
+
+ return sadtalker_paths
\ No newline at end of file
diff --git a/scripts/utils/model2safetensor.py b/scripts/utils/model2safetensor.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee5fd4d9fd0c816039e4153463d0799e80af3bec
--- /dev/null
+++ b/scripts/utils/model2safetensor.py
@@ -0,0 +1,141 @@
+import torch
+import yaml
+import os
+
+import safetensors
+from safetensors.torch import save_file
+from yacs.config import CfgNode as CN
+import sys
+
+sys.path.append('/apdcephfs/private_shadowcun/SadTalker')
+
+from scripts.face3d.models import networks
+
+from scripts.facerender.modules.keypoint_detector import HEEstimator, KPDetector
+from scripts.facerender.modules.mapping import MappingNet
+from scripts.facerender.modules.generator import OcclusionAwareGenerator, OcclusionAwareSPADEGenerator
+
+from scripts.audio2pose_models.audio2pose import Audio2Pose
+from scripts.audio2exp_models.networks import SimpleWrapperV2
+from scripts.test_audio2coeff import load_cpk
+
+size = 256
+############ face vid2vid
+config_path = os.path.join('src', 'config', 'facerender.yaml')
+current_root_path = '.'
+
+path_of_net_recon_model = os.path.join(current_root_path, 'checkpoints', 'epoch_20.pth')
+net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='')
+checkpoint = torch.load(path_of_net_recon_model, map_location='cpu')
+net_recon.load_state_dict(checkpoint['net_recon'])
+
+with open(config_path) as f:
+ config = yaml.safe_load(f)
+
+generator = OcclusionAwareSPADEGenerator(**config['model_params']['generator_params'],
+ **config['model_params']['common_params'])
+kp_extractor = KPDetector(**config['model_params']['kp_detector_params'],
+ **config['model_params']['common_params'])
+he_estimator = HEEstimator(**config['model_params']['he_estimator_params'],
+ **config['model_params']['common_params'])
+mapping = MappingNet(**config['model_params']['mapping_params'])
+
+def load_cpk_facevid2vid(checkpoint_path, generator=None, discriminator=None,
+ kp_detector=None, he_estimator=None, optimizer_generator=None,
+ optimizer_discriminator=None, optimizer_kp_detector=None,
+ optimizer_he_estimator=None, device="cpu"):
+
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
+ if generator is not None:
+ generator.load_state_dict(checkpoint['generator'])
+ if kp_detector is not None:
+ kp_detector.load_state_dict(checkpoint['kp_detector'])
+ if he_estimator is not None:
+ he_estimator.load_state_dict(checkpoint['he_estimator'])
+ if discriminator is not None:
+ try:
+ discriminator.load_state_dict(checkpoint['discriminator'])
+ except:
+ print ('No discriminator in the state-dict. Dicriminator will be randomly initialized')
+ if optimizer_generator is not None:
+ optimizer_generator.load_state_dict(checkpoint['optimizer_generator'])
+ if optimizer_discriminator is not None:
+ try:
+ optimizer_discriminator.load_state_dict(checkpoint['optimizer_discriminator'])
+ except RuntimeError as e:
+ print ('No discriminator optimizer in the state-dict. Optimizer will be not initialized')
+ if optimizer_kp_detector is not None:
+ optimizer_kp_detector.load_state_dict(checkpoint['optimizer_kp_detector'])
+ if optimizer_he_estimator is not None:
+ optimizer_he_estimator.load_state_dict(checkpoint['optimizer_he_estimator'])
+
+ return checkpoint['epoch']
+
+
+def load_cpk_facevid2vid_safetensor(checkpoint_path, generator=None,
+ kp_detector=None, he_estimator=None,
+ device="cpu"):
+
+ checkpoint = safetensors.torch.load_file(checkpoint_path)
+
+ if generator is not None:
+ x_generator = {}
+ for k,v in checkpoint.items():
+ if 'generator' in k:
+ x_generator[k.replace('generator.', '')] = v
+ generator.load_state_dict(x_generator)
+ if kp_detector is not None:
+ x_generator = {}
+ for k,v in checkpoint.items():
+ if 'kp_extractor' in k:
+ x_generator[k.replace('kp_extractor.', '')] = v
+ kp_detector.load_state_dict(x_generator)
+ if he_estimator is not None:
+ x_generator = {}
+ for k,v in checkpoint.items():
+ if 'he_estimator' in k:
+ x_generator[k.replace('he_estimator.', '')] = v
+ he_estimator.load_state_dict(x_generator)
+
+ return None
+
+free_view_checkpoint = '/apdcephfs/private_shadowcun/SadTalker/checkpoints/facevid2vid_'+str(size)+'-model.pth.tar'
+load_cpk_facevid2vid(free_view_checkpoint, kp_detector=kp_extractor, generator=generator, he_estimator=he_estimator)
+
+wav2lip_checkpoint = os.path.join(current_root_path, 'checkpoints', 'wav2lip.pth')
+
+audio2pose_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2pose_00140-model.pth')
+audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
+
+audio2exp_checkpoint = os.path.join(current_root_path, 'checkpoints', 'auido2exp_00300-model.pth')
+audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
+
+fcfg_pose = open(audio2pose_yaml_path)
+cfg_pose = CN.load_cfg(fcfg_pose)
+cfg_pose.freeze()
+audio2pose_model = Audio2Pose(cfg_pose, wav2lip_checkpoint)
+audio2pose_model.eval()
+load_cpk(audio2pose_checkpoint, model=audio2pose_model, device='cpu')
+
+# load audio2exp_model
+netG = SimpleWrapperV2()
+netG.eval()
+load_cpk(audio2exp_checkpoint, model=netG, device='cpu')
+
+class SadTalker(torch.nn.Module):
+ def __init__(self, kp_extractor, generator, netG, audio2pose, face_3drecon):
+ super(SadTalker, self).__init__()
+ self.kp_extractor = kp_extractor
+ self.generator = generator
+ self.audio2exp = netG
+ self.audio2pose = audio2pose
+ self.face_3drecon = face_3drecon
+
+
+model = SadTalker(kp_extractor, generator, netG, audio2pose_model, net_recon)
+
+# here, we want to convert it to safetensor
+save_file(model.state_dict(), "checkpoints/SadTalker_V0.0.2_"+str(size)+".safetensors")
+
+### test
+load_cpk_facevid2vid_safetensor('checkpoints/SadTalker_V0.0.2_'+str(size)+'.safetensors', kp_detector=kp_extractor, generator=generator, he_estimator=None)
\ No newline at end of file
diff --git a/scripts/utils/paste_pic.py b/scripts/utils/paste_pic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b733f1b2fb727c061a4d96f9c0c87364abd677a7
--- /dev/null
+++ b/scripts/utils/paste_pic.py
@@ -0,0 +1,69 @@
+import cv2, os
+import numpy as np
+from tqdm import tqdm
+import uuid
+
+from scripts.utils.videoio import save_video_with_watermark
+
+def paste_pic(video_path, pic_path, crop_info, new_audio_path, full_video_path, extended_crop=False):
+
+ if not os.path.isfile(pic_path):
+ raise ValueError('pic_path must be a valid path to video/image file')
+ elif pic_path.split('.')[-1] in ['jpg', 'png', 'jpeg']:
+ # loader for first frame
+ full_img = cv2.imread(pic_path)
+ else:
+ # loader for videos
+ video_stream = cv2.VideoCapture(pic_path)
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
+ full_frames = []
+ while 1:
+ still_reading, frame = video_stream.read()
+ if not still_reading:
+ video_stream.release()
+ break
+ break
+ full_img = frame
+ frame_h = full_img.shape[0]
+ frame_w = full_img.shape[1]
+
+ video_stream = cv2.VideoCapture(video_path)
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
+ crop_frames = []
+ while 1:
+ still_reading, frame = video_stream.read()
+ if not still_reading:
+ video_stream.release()
+ break
+ crop_frames.append(frame)
+
+ if len(crop_info) != 3:
+ print("you didn't crop the image")
+ return
+ else:
+ r_w, r_h = crop_info[0]
+ clx, cly, crx, cry = crop_info[1]
+ lx, ly, rx, ry = crop_info[2]
+ lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
+ # oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
+ # oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
+
+ if extended_crop:
+ oy1, oy2, ox1, ox2 = cly, cry, clx, crx
+ else:
+ oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
+
+ tmp_path = str(uuid.uuid4())+'.mp4'
+ out_tmp = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_w, frame_h))
+ for crop_frame in tqdm(crop_frames, 'seamlessClone:'):
+ p = cv2.resize(crop_frame.astype(np.uint8), (ox2-ox1, oy2 - oy1))
+
+ mask = 255*np.ones(p.shape, p.dtype)
+ location = ((ox1+ox2) // 2, (oy1+oy2) // 2)
+ gen_img = cv2.seamlessClone(p, full_img, mask, location, cv2.NORMAL_CLONE)
+ out_tmp.write(gen_img)
+
+ out_tmp.release()
+
+ save_video_with_watermark(tmp_path, new_audio_path, full_video_path, watermark=False)
+ os.remove(tmp_path)
diff --git a/scripts/utils/preprocess.py b/scripts/utils/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a297e385f9fecdc4c5c360d646be0d62c532540
--- /dev/null
+++ b/scripts/utils/preprocess.py
@@ -0,0 +1,170 @@
+import numpy as np
+import cv2, os, sys, torch
+from tqdm import tqdm
+from PIL import Image
+
+# 3dmm extraction
+import safetensors
+import safetensors.torch
+from scripts.face3d.util.preprocess import align_img
+from scripts.face3d.util.load_mats import load_lm3d
+from scripts.face3d.models import networks
+
+from scipy.io import loadmat, savemat
+from scripts.utils.croper import Preprocesser
+
+
+import warnings
+
+from scripts.utils.safetensor_helper import load_x_from_safetensor
+warnings.filterwarnings("ignore")
+
+def split_coeff(coeffs):
+ """
+ Return:
+ coeffs_dict -- a dict of torch.tensors
+
+ Parameters:
+ coeffs -- torch.tensor, size (B, 256)
+ """
+ id_coeffs = coeffs[:, :80]
+ exp_coeffs = coeffs[:, 80: 144]
+ tex_coeffs = coeffs[:, 144: 224]
+ angles = coeffs[:, 224: 227]
+ gammas = coeffs[:, 227: 254]
+ translations = coeffs[:, 254:]
+ return {
+ 'id': id_coeffs,
+ 'exp': exp_coeffs,
+ 'tex': tex_coeffs,
+ 'angle': angles,
+ 'gamma': gammas,
+ 'trans': translations
+ }
+
+
+class CropAndExtract():
+ def __init__(self, sadtalker_path, device):
+
+ self.propress = Preprocesser(device)
+ self.net_recon = networks.define_net_recon(net_recon='resnet50', use_last_fc=False, init_path='').to(device)
+
+ if sadtalker_path['use_safetensor']:
+ checkpoint = safetensors.torch.load_file(sadtalker_path['checkpoint'])
+ self.net_recon.load_state_dict(load_x_from_safetensor(checkpoint, 'face_3drecon'))
+ else:
+ checkpoint = torch.load(sadtalker_path['path_of_net_recon_model'], map_location=torch.device(device))
+ self.net_recon.load_state_dict(checkpoint['net_recon'])
+
+ self.net_recon.eval()
+ self.lm3d_std = load_lm3d(sadtalker_path['dir_of_BFM_fitting'])
+ self.device = device
+
+ def generate(self, input_path, save_dir, crop_or_resize='crop', source_image_flag=False, pic_size=256):
+
+ pic_name = os.path.splitext(os.path.split(input_path)[-1])[0]
+
+ landmarks_path = os.path.join(save_dir, pic_name+'_landmarks.txt')
+ coeff_path = os.path.join(save_dir, pic_name+'.mat')
+ png_path = os.path.join(save_dir, pic_name+'.png')
+
+ #load input
+ if not os.path.isfile(input_path):
+ raise ValueError('input_path must be a valid path to video/image file')
+ elif input_path.split('.')[-1] in ['jpg', 'png', 'jpeg']:
+ # loader for first frame
+ full_frames = [cv2.imread(input_path)]
+ fps = 25
+ else:
+ # loader for videos
+ video_stream = cv2.VideoCapture(input_path)
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
+ full_frames = []
+ while 1:
+ still_reading, frame = video_stream.read()
+ if not still_reading:
+ video_stream.release()
+ break
+ full_frames.append(frame)
+ if source_image_flag:
+ break
+
+ x_full_frames= [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in full_frames]
+
+ #### crop images as the
+ if 'crop' in crop_or_resize.lower(): # default crop
+ x_full_frames, crop, quad = self.propress.crop(x_full_frames, still=True if 'ext' in crop_or_resize.lower() else False, xsize=512)
+ clx, cly, crx, cry = crop
+ lx, ly, rx, ry = quad
+ lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
+ oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
+ crop_info = ((ox2 - ox1, oy2 - oy1), crop, quad)
+ elif 'full' in crop_or_resize.lower():
+ x_full_frames, crop, quad = self.propress.crop(x_full_frames, still=True if 'ext' in crop_or_resize.lower() else False, xsize=512)
+ clx, cly, crx, cry = crop
+ lx, ly, rx, ry = quad
+ lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
+ oy1, oy2, ox1, ox2 = cly+ly, cly+ry, clx+lx, clx+rx
+ crop_info = ((ox2 - ox1, oy2 - oy1), crop, quad)
+ else: # resize mode
+ oy1, oy2, ox1, ox2 = 0, x_full_frames[0].shape[0], 0, x_full_frames[0].shape[1]
+ crop_info = ((ox2 - ox1, oy2 - oy1), None, None)
+
+ frames_pil = [Image.fromarray(cv2.resize(frame,(pic_size, pic_size))) for frame in x_full_frames]
+ if len(frames_pil) == 0:
+ print('No face is detected in the input file')
+ return None, None
+
+ # save crop info
+ for frame in frames_pil:
+ cv2.imwrite(png_path, cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
+
+ # 2. get the landmark according to the detected face.
+ if not os.path.isfile(landmarks_path):
+ lm = self.propress.predictor.extract_keypoint(frames_pil, landmarks_path)
+ else:
+ print(' Using saved landmarks.')
+ lm = np.loadtxt(landmarks_path).astype(np.float32)
+ lm = lm.reshape([len(x_full_frames), -1, 2])
+
+ if not os.path.isfile(coeff_path):
+ # load 3dmm paramter generator from Deep3DFaceRecon_pytorch
+ video_coeffs, full_coeffs = [], []
+ for idx in tqdm(range(len(frames_pil)), desc='3DMM Extraction In Video:'):
+ frame = frames_pil[idx]
+ W,H = frame.size
+ lm1 = lm[idx].reshape([-1, 2])
+
+ if np.mean(lm1) == -1:
+ lm1 = (self.lm3d_std[:, :2]+1)/2.
+ lm1 = np.concatenate(
+ [lm1[:, :1]*W, lm1[:, 1:2]*H], 1
+ )
+ else:
+ lm1[:, -1] = H - 1 - lm1[:, -1]
+
+ trans_params, im1, lm1, _ = align_img(frame, lm1, self.lm3d_std)
+
+ trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32)
+ im_t = torch.tensor(np.array(im1)/255., dtype=torch.float32).permute(2, 0, 1).to(self.device).unsqueeze(0)
+
+ with torch.no_grad():
+ full_coeff = self.net_recon(im_t)
+ coeffs = split_coeff(full_coeff)
+
+ pred_coeff = {key:coeffs[key].cpu().numpy() for key in coeffs}
+
+ pred_coeff = np.concatenate([
+ pred_coeff['exp'],
+ pred_coeff['angle'],
+ pred_coeff['trans'],
+ trans_params[2:][None],
+ ], 1)
+ video_coeffs.append(pred_coeff)
+ full_coeffs.append(full_coeff.cpu().numpy())
+
+ semantic_npy = np.array(video_coeffs)[:,0]
+
+ savemat(coeff_path, {'coeff_3dmm': semantic_npy, 'full_3dmm': np.array(full_coeffs)[0]})
+
+ return coeff_path, png_path, crop_info
diff --git a/scripts/utils/safetensor_helper.py b/scripts/utils/safetensor_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cdbdd21e4ed656dfe2d31a57360afb3e96480b3
--- /dev/null
+++ b/scripts/utils/safetensor_helper.py
@@ -0,0 +1,8 @@
+
+
+def load_x_from_safetensor(checkpoint, key):
+ x_generator = {}
+ for k,v in checkpoint.items():
+ if key in k:
+ x_generator[k.replace(key+'.', '')] = v
+ return x_generator
\ No newline at end of file
diff --git a/scripts/utils/text2speech.py b/scripts/utils/text2speech.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9cd9f588b3dbfd9bc1e4c80c34ff13a74cadadc
--- /dev/null
+++ b/scripts/utils/text2speech.py
@@ -0,0 +1,20 @@
+import os
+import tempfile
+from TTS.api import TTS
+
+
+class TTSTalker():
+ def __init__(self) -> None:
+ model_name = TTS().list_models()[0]
+ self.tts = TTS(model_name)
+
+ def test(self, text, language='en'):
+
+ tempf = tempfile.NamedTemporaryFile(
+ delete = False,
+ suffix = ('.'+'wav'),
+ )
+
+ self.tts.tts_to_file(text, speaker=self.tts.speakers[0], language=language, file_path=tempf.name)
+
+ return tempf.name
diff --git a/scripts/utils/videoio.py b/scripts/utils/videoio.py
new file mode 100644
index 0000000000000000000000000000000000000000..08bfbdd7d4be97dc17fea4ad7b2733e9eb0ef975
--- /dev/null
+++ b/scripts/utils/videoio.py
@@ -0,0 +1,41 @@
+import shutil
+import uuid
+
+import os
+
+import cv2
+
+def load_video_to_cv2(input_path):
+ video_stream = cv2.VideoCapture(input_path)
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
+ full_frames = []
+ while 1:
+ still_reading, frame = video_stream.read()
+ if not still_reading:
+ video_stream.release()
+ break
+ full_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+ return full_frames
+
+def save_video_with_watermark(video, audio, save_path, watermark=False):
+ temp_file = str(uuid.uuid4())+'.mp4'
+ cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -vcodec copy "%s"' % (video, audio, temp_file)
+ os.system(cmd)
+
+ if watermark is False:
+ shutil.move(temp_file, save_path)
+ else:
+ # watermark
+ try:
+ ##### check if stable-diffusion-webui
+ import webui
+ from modules import paths
+ watarmark_path = paths.script_path+"/extensions/SadTalker/docs/sadtalker_logo.png"
+ except:
+ # get the root path of sadtalker.
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ watarmark_path = dir_path+"/../../docs/sadtalker_logo.png"
+
+ cmd = r'ffmpeg -y -hide_banner -loglevel error -i "%s" -i "%s" -filter_complex "[1]scale=100:-1[wm];[0][wm]overlay=(main_w-overlay_w)-10:10" "%s"' % (temp_file, watarmark_path, save_path)
+ os.system(cmd)
+ os.remove(temp_file)
\ No newline at end of file
diff --git a/scripts/weights/facelib/detection_Resnet50_Final.pth b/scripts/weights/facelib/detection_Resnet50_Final.pth
new file mode 100644
index 0000000000000000000000000000000000000000..16546738ce0a00a9fd47585e0fc52744d31cc117
--- /dev/null
+++ b/scripts/weights/facelib/detection_Resnet50_Final.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
+size 109497761
diff --git a/scripts/weights/facelib/parsing_parsenet.pth b/scripts/weights/facelib/parsing_parsenet.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1ac2efc50360a79c9905dbac57d9d99cbfbe863c
--- /dev/null
+++ b/scripts/weights/facelib/parsing_parsenet.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
+size 85331193
diff --git a/subprocess/LivePortrait/.gitignore b/subprocess/LivePortrait/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..1f85f19b8dc076778ff4cf5dcf758628ccf2df8f
--- /dev/null
+++ b/subprocess/LivePortrait/.gitignore
@@ -0,0 +1,21 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+**/__pycache__/
+*.py[cod]
+**/*.py[cod]
+*$py.class
+
+# Model weights
+**/*.pth
+**/*.onnx
+
+pretrained_weights/*.md
+pretrained_weights/docs
+
+# Ipython notebook
+*.ipynb
+
+# Temporary files or benchmark resources
+animations/*
+tmp/*
+.vscode/launch.json
diff --git a/subprocess/LivePortrait/.vscode/settings.json b/subprocess/LivePortrait/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..1bca84ccf9fed7936fc93d2704ff4eab6c734728
--- /dev/null
+++ b/subprocess/LivePortrait/.vscode/settings.json
@@ -0,0 +1,19 @@
+{
+ "[python]": {
+ "editor.tabSize": 4
+ },
+ "files.eol": "\n",
+ "files.insertFinalNewline": true,
+ "files.trimFinalNewlines": true,
+ "files.trimTrailingWhitespace": true,
+ "files.exclude": {
+ "**/.git": true,
+ "**/.svn": true,
+ "**/.hg": true,
+ "**/CVS": true,
+ "**/.DS_Store": true,
+ "**/Thumbs.db": true,
+ "**/*.crswap": true,
+ "**/__pycache__": true
+ }
+}
diff --git a/subprocess/LivePortrait/LICENSE b/subprocess/LivePortrait/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..9e8f5026e9273b98745188ec4bbc8ac05b2b22ef
--- /dev/null
+++ b/subprocess/LivePortrait/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Kuaishou Visual Generation and Interaction Center
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14.mp4 b/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..b75d37fdea47db26f14f69f85b8994e114f2cc9c
--- /dev/null
+++ b/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f25d31d83ae06f8dc313ae6ef9016269336219f25ffa8765d60920d90f314b7d
+size 6107008
diff --git a/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14_concat.mp4 b/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..1cfc6a6641f837c1ac035aa47a9d46b99919225a
--- /dev/null
+++ b/subprocess/LivePortrait/animations/IMG_20240410_151931_Bokeh~2--d14_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30dde4ba24793ed58398f9ee4d54a5a7d40bee7ed2634f63603a8670cdb66b97
+size 2933593
diff --git a/subprocess/LivePortrait/animations/image3--d6.mp4 b/subprocess/LivePortrait/animations/image3--d6.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..7b84139eaae2f30e1cdcadd5dde9b2c35832ee1b
Binary files /dev/null and b/subprocess/LivePortrait/animations/image3--d6.mp4 differ
diff --git a/subprocess/LivePortrait/animations/image3--d6_concat.mp4 b/subprocess/LivePortrait/animations/image3--d6_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..0b322e7d61b6e968674f38f24ac1382695175fb6
--- /dev/null
+++ b/subprocess/LivePortrait/animations/image3--d6_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be7f3b6e664d27974e38fb168e6aef4379e91d50da6e55023f009ce6c00e9e54
+size 5005845
diff --git a/subprocess/LivePortrait/animations/image3--intro.mp4 b/subprocess/LivePortrait/animations/image3--intro.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..6e93bae84ae82eadf739f6f5d3b4514a26344ab4
Binary files /dev/null and b/subprocess/LivePortrait/animations/image3--intro.mp4 differ
diff --git a/subprocess/LivePortrait/animations/image3--intro_concat.mp4 b/subprocess/LivePortrait/animations/image3--intro_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..4f562366a6893575066de2ee96595752967a54ec
--- /dev/null
+++ b/subprocess/LivePortrait/animations/image3--intro_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:079eac3a6cf84b04e18723a066448fddffcd71988b1f609aedd14578797384d9
+size 1373427
diff --git a/subprocess/LivePortrait/animations/input--d10.mp4 b/subprocess/LivePortrait/animations/input--d10.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..da3c6bdca05a2ae1e3acfd585dd499c8a7147576
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d10.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18c5ebdbfbd40d4a05045acacd6af673962a411672b137bba48f482210d491e6
+size 2421900
diff --git a/subprocess/LivePortrait/animations/input--d10_concat.mp4 b/subprocess/LivePortrait/animations/input--d10_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..00ffb3bc6d4903d36eced467d7821b5aa0b17666
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d10_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7b8a86d3bcb23090991b567683093097f1c7634e478b3953f972136e6124bb
+size 3403717
diff --git a/subprocess/LivePortrait/animations/input--d13.mp4 b/subprocess/LivePortrait/animations/input--d13.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..59b219cd07692d3a39ae4d4a7051e7914f4d6ae6
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d13.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c7b7df54d0aeb8a75eaa5ddec371ffcd51308e5a42d6a273c0b90f40637e828
+size 1424807
diff --git a/subprocess/LivePortrait/animations/input--d13_concat.mp4 b/subprocess/LivePortrait/animations/input--d13_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..6c130bc85a2f03fe6dea60126ed013420ef5ac07
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d13_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d071a3d6c188a9f36d45d82cebd2cd761c7f6702e9c5e1c887e238261169efd
+size 2013707
diff --git a/subprocess/LivePortrait/animations/input--d19.mp4 b/subprocess/LivePortrait/animations/input--d19.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..9ec7fafa20814458f9ba9d2d89babb78c160f2d1
Binary files /dev/null and b/subprocess/LivePortrait/animations/input--d19.mp4 differ
diff --git a/subprocess/LivePortrait/animations/input--d19_concat.mp4 b/subprocess/LivePortrait/animations/input--d19_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..094cbc030140592b0d145dd411264a5c42d8c5fa
Binary files /dev/null and b/subprocess/LivePortrait/animations/input--d19_concat.mp4 differ
diff --git a/subprocess/LivePortrait/animations/input--d3.mp4 b/subprocess/LivePortrait/animations/input--d3.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..59ac6adc061ca88b7bb7572542ff141a4cda23d6
Binary files /dev/null and b/subprocess/LivePortrait/animations/input--d3.mp4 differ
diff --git a/subprocess/LivePortrait/animations/input--d3_concat.mp4 b/subprocess/LivePortrait/animations/input--d3_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..56cdedec1e5798ae6c7f200d262435b7bcfad39e
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d3_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00212de5f9bc729ce91060887668022f48a9507f147b953c53b963680324aa53
+size 1632067
diff --git a/subprocess/LivePortrait/animations/input--d9.mp4 b/subprocess/LivePortrait/animations/input--d9.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..7fc5db9f6c9685ffade8320bfc38e3737db0ddb2
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d9.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22dc451add7cf70badaae8517774f881a5d900a0909fee15605771b499cf413
+size 3321644
diff --git a/subprocess/LivePortrait/animations/input--d9_concat.mp4 b/subprocess/LivePortrait/animations/input--d9_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..644f0b9675c0ef5d1bcf7be37dc197f460d9466b
--- /dev/null
+++ b/subprocess/LivePortrait/animations/input--d9_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a3a13fb284d8805076a6adfff2684728f8b7efe6c3d6591259dd2a0a528ee3
+size 4673474
diff --git a/subprocess/LivePortrait/animations/s1--d13.mp4 b/subprocess/LivePortrait/animations/s1--d13.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..cf1cd0f4719bc216276850e6b0ed84ca13b4aed8
--- /dev/null
+++ b/subprocess/LivePortrait/animations/s1--d13.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2c741ffc7e32a705ee22f17d831f397d2890eb7bb0e7c126b5b514ec2f015a
+size 1546609
diff --git a/subprocess/LivePortrait/animations/s1--d13_concat.mp4 b/subprocess/LivePortrait/animations/s1--d13_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..e2a2065beeaa38755c96d137da4ae08342db1418
--- /dev/null
+++ b/subprocess/LivePortrait/animations/s1--d13_concat.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:943508c9079ac81bda287f468c22dd085872c770a84fafc8fee65ba5636b1932
+size 1723831
diff --git a/subprocess/LivePortrait/animations/s10--d0.mp4 b/subprocess/LivePortrait/animations/s10--d0.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..8cd2425b82630e54cd28974dff53fe47f8587402
Binary files /dev/null and b/subprocess/LivePortrait/animations/s10--d0.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s10--d0_concat.mp4 b/subprocess/LivePortrait/animations/s10--d0_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..23e648a14be52cf68ecd2117b489a82c0be2b01b
Binary files /dev/null and b/subprocess/LivePortrait/animations/s10--d0_concat.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s5--d0.mp4 b/subprocess/LivePortrait/animations/s5--d0.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..c68d4fc12a4ca4597875431d902db14b8691f020
Binary files /dev/null and b/subprocess/LivePortrait/animations/s5--d0.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s5--d0_concat.mp4 b/subprocess/LivePortrait/animations/s5--d0_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..b00fe6f4721169eff9b0358a3b3be9297e1e4963
Binary files /dev/null and b/subprocess/LivePortrait/animations/s5--d0_concat.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s6--d0.mp4 b/subprocess/LivePortrait/animations/s6--d0.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..dbbfbdad86952b35c3ee19584add8ecd2244025d
Binary files /dev/null and b/subprocess/LivePortrait/animations/s6--d0.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s6--d0_concat.mp4 b/subprocess/LivePortrait/animations/s6--d0_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..7f6e876d5cd430d9789810fa98a0525b2214da74
Binary files /dev/null and b/subprocess/LivePortrait/animations/s6--d0_concat.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s6--d18.mp4 b/subprocess/LivePortrait/animations/s6--d18.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..c1f66e195e6c2297fa15b7eaa993ac0c58f8bc7e
Binary files /dev/null and b/subprocess/LivePortrait/animations/s6--d18.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s6--d18_concat.mp4 b/subprocess/LivePortrait/animations/s6--d18_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..89eefd235afa7314bdf4ecfe250d89a01f404887
Binary files /dev/null and b/subprocess/LivePortrait/animations/s6--d18_concat.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s9--d0.mp4 b/subprocess/LivePortrait/animations/s9--d0.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..04d54d9300084786fa73459a38db5511fe256dbc
Binary files /dev/null and b/subprocess/LivePortrait/animations/s9--d0.mp4 differ
diff --git a/subprocess/LivePortrait/animations/s9--d0_concat.mp4 b/subprocess/LivePortrait/animations/s9--d0_concat.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..c4bb81b068b5eb553814b5cf52a6e37636f9e870
Binary files /dev/null and b/subprocess/LivePortrait/animations/s9--d0_concat.mp4 differ
diff --git a/subprocess/LivePortrait/app.py b/subprocess/LivePortrait/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e91e6a7bd7ddd428e7a3fe3551c8085d3a1dd38
--- /dev/null
+++ b/subprocess/LivePortrait/app.py
@@ -0,0 +1,205 @@
+# coding: utf-8
+
+"""
+The entrance of the gradio
+"""
+import os
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+
+import tyro
+import gradio as gr
+import os.path as osp
+from src.utils.helper import load_description
+from src.gradio_pipeline import GradioPipeline
+from src.config.crop_config import CropConfig
+from src.config.argument_config import ArgumentConfig
+from src.config.inference_config import InferenceConfig
+
+
+def partial_fields(target_class, kwargs):
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
+
+
+# set tyro theme
+tyro.extras.set_accent_color("bright_cyan")
+print(ArgumentConfig)
+args = tyro.cli(ArgumentConfig)
+
+# specify configs for inference
+inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
+crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
+
+gradio_pipeline = GradioPipeline(
+ inference_cfg=inference_cfg,
+ crop_cfg=crop_cfg,
+ args=args
+)
+
+
+def gpu_wrapped_execute_video(*args, **kwargs):
+ return gradio_pipeline.execute_video(*args, **kwargs)
+
+
+def gpu_wrapped_execute_image(*args, **kwargs):
+ return gradio_pipeline.execute_image(*args, **kwargs)
+
+
+# assets
+title_md = "assets/gradio_title.md"
+example_portrait_dir = "assets/examples/source"
+example_video_dir = "assets/examples/driving"
+data_examples = [
+ [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
+ [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
+ [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
+ [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False],
+ [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False],
+ [osp.join(example_portrait_dir, "s2.jpg"), osp.join(example_video_dir, "d13.mp4"), True, True, True, True],
+]
+#################### interface logic ####################
+
+# Define components first
+eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
+lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
+retargeting_input_image = gr.Image(type="filepath")
+output_image = gr.Image(type="numpy")
+output_image_paste_back = gr.Image(type="numpy")
+output_video = gr.Video()
+output_video_concat = gr.Video()
+
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+ gr.HTML(load_description(title_md))
+ gr.Markdown(load_description("assets/gradio_description_upload.md"))
+ with gr.Row():
+ with gr.Accordion(open=True, label="Source Portrait"):
+ image_input = gr.Image(type="filepath")
+ gr.Examples(
+ examples=[
+ [osp.join(example_portrait_dir, "s9.jpg")],
+ [osp.join(example_portrait_dir, "s6.jpg")],
+ [osp.join(example_portrait_dir, "s10.jpg")],
+ [osp.join(example_portrait_dir, "s5.jpg")],
+ [osp.join(example_portrait_dir, "s7.jpg")],
+ [osp.join(example_portrait_dir, "s12.jpg")],
+ ],
+ inputs=[image_input],
+ cache_examples=False,
+ )
+ with gr.Accordion(open=True, label="Driving Video"):
+ video_input = gr.Video()
+ gr.Examples(
+ examples=[
+ [osp.join(example_video_dir, "d0.mp4")],
+ [osp.join(example_video_dir, "d18.mp4")],
+ [osp.join(example_video_dir, "d19.mp4")],
+ [osp.join(example_video_dir, "d14.mp4")],
+ [osp.join(example_video_dir, "d6.mp4")],
+ ],
+ inputs=[video_input],
+ cache_examples=False,
+ )
+ with gr.Row():
+ with gr.Accordion(open=False, label="Animation Instructions and Options"):
+ gr.Markdown(load_description("assets/gradio_description_animation.md"))
+ with gr.Row():
+ flag_relative_input = gr.Checkbox(value=True, label="relative motion")
+ flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
+ flag_remap_input = gr.Checkbox(value=True, label="paste-back")
+ flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving video)")
+ with gr.Row():
+ with gr.Column():
+ process_button_animation = gr.Button("🚀 Animate", variant="primary")
+ with gr.Column():
+ process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="🧹 Clear")
+ with gr.Row():
+ with gr.Column():
+ with gr.Accordion(open=True, label="The animated video in the original image space"):
+ output_video.render()
+ with gr.Column():
+ with gr.Accordion(open=True, label="The animated video"):
+ output_video_concat.render()
+ with gr.Row():
+ # Examples
+ gr.Markdown("## You could also choose the examples below by one click ⬇️")
+ with gr.Row():
+ gr.Examples(
+ examples=data_examples,
+ fn=gpu_wrapped_execute_video,
+ inputs=[
+ image_input,
+ video_input,
+ flag_relative_input,
+ flag_do_crop_input,
+ flag_remap_input,
+ flag_crop_driving_video_input
+ ],
+ outputs=[output_image, output_image_paste_back],
+ examples_per_page=len(data_examples),
+ cache_examples=False,
+ )
+ gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
+ with gr.Row(visible=True):
+ eye_retargeting_slider.render()
+ lip_retargeting_slider.render()
+ with gr.Row(visible=True):
+ process_button_retargeting = gr.Button("🚗 Retargeting", variant="primary")
+ process_button_reset_retargeting = gr.ClearButton(
+ [
+ eye_retargeting_slider,
+ lip_retargeting_slider,
+ retargeting_input_image,
+ output_image,
+ output_image_paste_back
+ ],
+ value="🧹 Clear"
+ )
+ with gr.Row(visible=True):
+ with gr.Column():
+ with gr.Accordion(open=True, label="Retargeting Input"):
+ retargeting_input_image.render()
+ gr.Examples(
+ examples=[
+ [osp.join(example_portrait_dir, "s9.jpg")],
+ [osp.join(example_portrait_dir, "s6.jpg")],
+ [osp.join(example_portrait_dir, "s10.jpg")],
+ [osp.join(example_portrait_dir, "s5.jpg")],
+ [osp.join(example_portrait_dir, "s7.jpg")],
+ [osp.join(example_portrait_dir, "s12.jpg")],
+ ],
+ inputs=[retargeting_input_image],
+ cache_examples=False,
+ )
+ with gr.Column():
+ with gr.Accordion(open=True, label="Retargeting Result"):
+ output_image.render()
+ with gr.Column():
+ with gr.Accordion(open=True, label="Paste-back Result"):
+ output_image_paste_back.render()
+ # binding functions for buttons
+ process_button_retargeting.click(
+ # fn=gradio_pipeline.execute_image,
+ fn=gpu_wrapped_execute_image,
+ inputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image, flag_do_crop_input],
+ outputs=[output_image, output_image_paste_back],
+ show_progress=True
+ )
+ process_button_animation.click(
+ fn=gpu_wrapped_execute_video,
+ inputs=[
+ image_input,
+ video_input,
+ flag_relative_input,
+ flag_do_crop_input,
+ flag_remap_input,
+ flag_crop_driving_video_input
+ ],
+ outputs=[output_video, output_video_concat],
+ show_progress=True
+ )
+
+
+demo.launch(
+ server_port=args.server_port,
+ share=args.share,
+ server_name=args.server_name
+)
diff --git a/subprocess/LivePortrait/assets/.gitignore b/subprocess/LivePortrait/assets/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..892dfa4274b60c1629e26719bbd1e462fcce33e8
--- /dev/null
+++ b/subprocess/LivePortrait/assets/.gitignore
@@ -0,0 +1,2 @@
+examples/driving/*.pkl
+examples/driving/*_crop.mp4
diff --git a/subprocess/LivePortrait/assets/docs/changelog/2024-07-10.md b/subprocess/LivePortrait/assets/docs/changelog/2024-07-10.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b7fa8880808ee37b43a86851a557570deae12f3
--- /dev/null
+++ b/subprocess/LivePortrait/assets/docs/changelog/2024-07-10.md
@@ -0,0 +1,17 @@
+## 2024/07/10
+
+**First, thank you all for your attention, support, sharing, and contributions to LivePortrait!** ❤️
+The popularity of LivePortrait has exceeded our expectations. If you encounter any issues or other problems and we do not respond promptly, please accept our apologies. We are still actively updating and improving this repository.
+
+### Updates
+
+- Audio and video concatenating: If the driving video contains audio, it will automatically be included in the generated video. Additionally, the generated video will maintain the same FPS as the driving video. If you run LivePortrait on Windows, you may need to install `ffmpeg` exe, see issue [#94](https://github.com/KwaiVGI/LivePortrait/issues/94).
+
+- Driving video auto-cropping: Implemented automatic cropping for driving videos by tracking facial landmarks and calculating a global cropping box with a 1:1 aspect ratio. Alternatively, you can crop using video editing software or other tools to achieve a 1:1 ratio. Auto-cropping is not enbaled by default, you can specify it by `--flag_crop_driving_video`.
+
+- Template making: Added the ability to create templates to protect privacy. The template is a `.pkl` file that only contains the motions of the driving video. Theoretically, it is impossible to reconstruct the original face from the template. These templates can be used to generate videos without needing the original driving video. By default, the template will be generated and saved as a .pkl file with the same name as the driving video. Once generated, you can specify it using the `-d` or `--driving_info` option.
+
+
+### Others
+
+- If you encounter a black box problem, disable half-precision inference by using `--no_flag_use_half_precision`, reported by issue [#40](https://github.com/KwaiVGI/LivePortrait/issues/40), [#48](https://github.com/KwaiVGI/LivePortrait/issues/48), [#62](https://github.com/KwaiVGI/LivePortrait/issues/62).
diff --git a/subprocess/LivePortrait/assets/docs/inference.gif b/subprocess/LivePortrait/assets/docs/inference.gif
new file mode 100644
index 0000000000000000000000000000000000000000..7e18022e5245dcb6449df6d190b538d5ca024e06
Binary files /dev/null and b/subprocess/LivePortrait/assets/docs/inference.gif differ
diff --git a/subprocess/LivePortrait/assets/docs/showcase.gif b/subprocess/LivePortrait/assets/docs/showcase.gif
new file mode 100644
index 0000000000000000000000000000000000000000..fae84c2d3550a37446e482286b70902b21e2e232
--- /dev/null
+++ b/subprocess/LivePortrait/assets/docs/showcase.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bca5f38bfd555bf7c013312d87883afdf39d97fba719ac171c60f897af49e21
+size 6623248
diff --git a/subprocess/LivePortrait/assets/docs/showcase2.gif b/subprocess/LivePortrait/assets/docs/showcase2.gif
new file mode 100644
index 0000000000000000000000000000000000000000..29175c0eeb85b9db0ffd61e3e9281dffe3536352
--- /dev/null
+++ b/subprocess/LivePortrait/assets/docs/showcase2.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb1fffb139681775780b2956e7d0289f55d199c1a3e14ab263887864d4b0d586
+size 2881351
diff --git a/subprocess/LivePortrait/assets/examples/driving/d0.mp4 b/subprocess/LivePortrait/assets/examples/driving/d0.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..92391dd3ff235fc82f29b7cc77fe4a7ce183d934
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d0.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63f6f9962e1fdf6e6722172e7a18155204858d5d5ce3b1e0646c150360c33bed
+size 2958395
diff --git a/subprocess/LivePortrait/assets/examples/driving/d0.pkl b/subprocess/LivePortrait/assets/examples/driving/d0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..91be76dd7e9ba4f0322f530358e599393f619705
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56217aa88f7b03483f6a04294fe7813e0d8de624e041284d72ce1436766cd860
+size 41087
diff --git a/subprocess/LivePortrait/assets/examples/driving/d1.pkl b/subprocess/LivePortrait/assets/examples/driving/d1.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8e11db176d93c34f7b44aa94487ac0a6715168cb
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d1.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16b47d68396e4a5fc0756b4c83827e8fc27c08bc92be1aa04809f741d9db95f9
+size 8599
diff --git a/subprocess/LivePortrait/assets/examples/driving/d10.mp4 b/subprocess/LivePortrait/assets/examples/driving/d10.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..9123c788df48b50227c6aeb9a1d4061510424223
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d10.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9074d557e127f4a506bd0def25e2b3182676e917d53982852d33c807a95ef1fb
+size 1146372
diff --git a/subprocess/LivePortrait/assets/examples/driving/d11.mp4 b/subprocess/LivePortrait/assets/examples/driving/d11.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..378d00065aaa1d30d6b14be10e0e78188deba152
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/driving/d11.mp4 differ
diff --git a/subprocess/LivePortrait/assets/examples/driving/d12.mp4 b/subprocess/LivePortrait/assets/examples/driving/d12.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..984922e5c722fa9672dc6c6765bf1183466daf5b
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/driving/d12.mp4 differ
diff --git a/subprocess/LivePortrait/assets/examples/driving/d13.mp4 b/subprocess/LivePortrait/assets/examples/driving/d13.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..23b6af6e4afa879a11ec8284bfdb3253739e6b41
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d13.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d03e39c28323cde1c5fc6c5629aa83fe6c834fa7c9ed2dac969e1247eaafdb60
+size 2475854
diff --git a/subprocess/LivePortrait/assets/examples/driving/d14.mp4 b/subprocess/LivePortrait/assets/examples/driving/d14.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..e4a25d614cae7ae9b0425539da1c24d09d06c7db
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/driving/d14.mp4 differ
diff --git a/subprocess/LivePortrait/assets/examples/driving/d18.mp4 b/subprocess/LivePortrait/assets/examples/driving/d18.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..c23ade1841fa5744af3ffdc3a42d52b9227a9d2e
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/driving/d18.mp4 differ
diff --git a/subprocess/LivePortrait/assets/examples/driving/d19.mp4 b/subprocess/LivePortrait/assets/examples/driving/d19.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..07562e983f601d00bcc0e388fe60872c046bcbaf
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/driving/d19.mp4 differ
diff --git a/subprocess/LivePortrait/assets/examples/driving/d2.pkl b/subprocess/LivePortrait/assets/examples/driving/d2.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..6bc7d490b84d9a06436b133e4c1457b6055f8dc1
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d2.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:021a0e83d4ae81ab75b49b31a6cf75ac7987c86e02808aced3dd49894512a082
+size 8599
diff --git a/subprocess/LivePortrait/assets/examples/driving/d3.mp4 b/subprocess/LivePortrait/assets/examples/driving/d3.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..8b70b6aa3c0e566a4fa3e5959f2d3b916e99b708
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d3.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5c86e49b1b43dcb1449b499eb5a7f0cbae2f78aec08b5598193be1e4257099
+size 1430968
diff --git a/subprocess/LivePortrait/assets/examples/driving/d5.pkl b/subprocess/LivePortrait/assets/examples/driving/d5.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4fde2987728e8e4491600c68c951189c095ef90e
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d5.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91f2863838a089fe418b22864e7c48ac1f2b9d4513afb033a9d9dd5979a90b8c
+size 77776
diff --git a/subprocess/LivePortrait/assets/examples/driving/d6.mp4 b/subprocess/LivePortrait/assets/examples/driving/d6.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..44f351385cef843b21b03fab8c3b10e0c005ec5e
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d6.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00e3ea79bbf28cbdc4fbb67ec655d9a0fe876e880ec45af55ae481348d0c0fff
+size 1967790
diff --git a/subprocess/LivePortrait/assets/examples/driving/d7.pkl b/subprocess/LivePortrait/assets/examples/driving/d7.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..be7c9d74d12c19a8460b215da117de737889b5b2
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d7.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84aed70f3dd01ebd818c51fc11762eeff51efeef05b1f15a660b105c4c0748da
+size 93496
diff --git a/subprocess/LivePortrait/assets/examples/driving/d8.pkl b/subprocess/LivePortrait/assets/examples/driving/d8.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..dc631a4fcbd858afec881f66fd13b19f820d2eab
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d8.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:099afc34d40794aa733644af76dcd1bc387573c381a82340018df7019a06d68e
+size 144334
diff --git a/subprocess/LivePortrait/assets/examples/driving/d9.mp4 b/subprocess/LivePortrait/assets/examples/driving/d9.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..7803b3bf5c460a79d94e5cfbedb0de1f52d449d2
--- /dev/null
+++ b/subprocess/LivePortrait/assets/examples/driving/d9.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a414aa1d547be35306d692065a2157434bf40a6025ba8e30ce12e5bb322cc33
+size 2257929
diff --git a/subprocess/LivePortrait/assets/examples/source/s0.jpg b/subprocess/LivePortrait/assets/examples/source/s0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ef44c593be38cea30422fff9ed986a8a77889348
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s0.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s1.jpg b/subprocess/LivePortrait/assets/examples/source/s1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ebacda3519a1452aee239f7e104d2c6ff40beb25
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s1.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s10.jpg b/subprocess/LivePortrait/assets/examples/source/s10.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ee9616b592f070fbe90a8717da01477e8d4ee01f
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s10.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s11.jpg b/subprocess/LivePortrait/assets/examples/source/s11.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bd2fa2d2867336215012943addd7c7def2a29ccb
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s11.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s12.jpg b/subprocess/LivePortrait/assets/examples/source/s12.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d3d65c1e8e099ec279d730d296875b937f885417
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s12.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s2.jpg b/subprocess/LivePortrait/assets/examples/source/s2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e851bd20b65c552266a87bb87a9b509e3ea56f7d
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s2.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s3.jpg b/subprocess/LivePortrait/assets/examples/source/s3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9f3ba2a358e5b88450e7466761dff3e983e18e16
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s3.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s4.jpg b/subprocess/LivePortrait/assets/examples/source/s4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..17f611bf942ad168d4e4d03b7e5c42d6650c4be1
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s4.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s5.jpg b/subprocess/LivePortrait/assets/examples/source/s5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9abad7ef061b93579a373cf141d38710d9b1e32d
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s5.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s6.jpg b/subprocess/LivePortrait/assets/examples/source/s6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..91c13d5f2b48d143ca596566ad10f0a0e5693da4
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s6.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s7.jpg b/subprocess/LivePortrait/assets/examples/source/s7.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cf96f2d5651f7ae0faf08193ecd3df282c5c3b53
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s7.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s8.jpg b/subprocess/LivePortrait/assets/examples/source/s8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b415ed1d4a4e5cf01e6dc30d6b4ced20814558d5
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s8.jpg differ
diff --git a/subprocess/LivePortrait/assets/examples/source/s9.jpg b/subprocess/LivePortrait/assets/examples/source/s9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3ef7251ba10bf83356587016b126a52bdbca7b18
Binary files /dev/null and b/subprocess/LivePortrait/assets/examples/source/s9.jpg differ
diff --git a/subprocess/LivePortrait/assets/gradio_description_animation.md b/subprocess/LivePortrait/assets/gradio_description_animation.md
new file mode 100644
index 0000000000000000000000000000000000000000..cad1ad62bb41113c0d2e75a93581748ff65d384f
--- /dev/null
+++ b/subprocess/LivePortrait/assets/gradio_description_animation.md
@@ -0,0 +1,16 @@
+🔥 To animate the source portrait with the driving video, please follow these steps:
+
+1. In the Animation Options section, we recommend enabling the do crop (source) option if faces occupy a small portion of your image.
+
+
+2. Press the 🚀 Animate button and wait for a moment. Your animated video will appear in the result block. This may take a few moments.
+
+
+3. If you want to upload your own driving video, the best practice:
+
+ - Crop it to a 1:1 aspect ratio (e.g., 512x512 or 256x256 pixels), or enable auto-driving by checking `do crop (driving video)`.
+ - Focus on the head area, similar to the example videos.
+ - Minimize shoulder movement.
+ - Make sure the first frame of driving video is a frontal face with **neutral expression**.
+
+
diff --git a/subprocess/LivePortrait/assets/gradio_description_retargeting.md b/subprocess/LivePortrait/assets/gradio_description_retargeting.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ff1a80d0a025b765f111a8c45a25cd20d5753d9
--- /dev/null
+++ b/subprocess/LivePortrait/assets/gradio_description_retargeting.md
@@ -0,0 +1,4 @@
+
+
+## Retargeting
+🔥 To edit the eyes and lip open ratio of the source portrait, drag the sliders and click the 🚗 Retargeting button. You can try running it multiple times. 😊 Set both ratios to 0.8 to see what's going on!
diff --git a/subprocess/LivePortrait/assets/gradio_description_upload.md b/subprocess/LivePortrait/assets/gradio_description_upload.md
new file mode 100644
index 0000000000000000000000000000000000000000..035a6c2332bbd7485a367612e20818cc26dad857
--- /dev/null
+++ b/subprocess/LivePortrait/assets/gradio_description_upload.md
@@ -0,0 +1,2 @@
+## 🤗 This is the official gradio demo for **LivePortrait**.
+Please upload or use a webcam to get a Source Portrait (any aspect ratio) and upload a Driving Video (1:1 aspect ratio, or any aspect ratio with do crop (driving video) checked).
diff --git a/subprocess/LivePortrait/assets/gradio_title.md b/subprocess/LivePortrait/assets/gradio_title.md
new file mode 100644
index 0000000000000000000000000000000000000000..c9bbfc2e89419eaafabfe636e4d3230eb0b5e7b0
--- /dev/null
+++ b/subprocess/LivePortrait/assets/gradio_title.md
@@ -0,0 +1,11 @@
+
+
+
LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control
+
+
+
diff --git a/subprocess/LivePortrait/inference.py b/subprocess/LivePortrait/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..8387e7f01657b64430f50603b45557d8ace3304f
--- /dev/null
+++ b/subprocess/LivePortrait/inference.py
@@ -0,0 +1,33 @@
+# coding: utf-8
+
+import tyro
+from src.config.argument_config import ArgumentConfig
+from src.config.inference_config import InferenceConfig
+from src.config.crop_config import CropConfig
+from src.live_portrait_pipeline import LivePortraitPipeline
+
+
+def partial_fields(target_class, kwargs):
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
+
+
+def main():
+ # set tyro theme
+ tyro.extras.set_accent_color("bright_cyan")
+ args = tyro.cli(ArgumentConfig)
+
+ # specify configs for inference
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
+
+ live_portrait_pipeline = LivePortraitPipeline(
+ inference_cfg=inference_cfg,
+ crop_cfg=crop_cfg
+ )
+
+ # run
+ live_portrait_pipeline.execute(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/subprocess/LivePortrait/pretrained_weights/.gitattributes b/subprocess/LivePortrait/pretrained_weights/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..1240e442c9107184a69cb6f4a3961ba16cbb8a1b
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/.gitattributes
@@ -0,0 +1,45 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+liveportrait/retargeting_models/stitching_retargeting_module.pth filter=lfs diff=lfs merge=lfs -text
+liveportrait/base_models/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
+liveportrait/base_models/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
+liveportrait/base_models/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
+liveportrait/base_models/warping_module.pth filter=lfs diff=lfs merge=lfs -text
+insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
+insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
+liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
+docs/inference.gif filter=lfs diff=lfs merge=lfs -text
+docs/showcase2.gif filter=lfs diff=lfs merge=lfs -text
diff --git a/subprocess/LivePortrait/pretrained_weights/.gitignore b/subprocess/LivePortrait/pretrained_weights/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..e646a996685d1a4a487decd6dfb0022954b88b61
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/.gitignore
@@ -0,0 +1,18 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+**/__pycache__/
+*.py[cod]
+**/*.py[cod]
+*$py.class
+
+# Model weights
+#**/*.pth
+#**/*.onnx
+
+# Ipython notebook
+*.ipynb
+
+# Temporary files or benchmark resources
+animations/*
+tmp/*
+gradio_cached_examples/
diff --git a/subprocess/LivePortrait/pretrained_weights/README.md b/subprocess/LivePortrait/pretrained_weights/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..06fced1472a40110698cb3b3bebcf334f63e970f
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/README.md
@@ -0,0 +1,148 @@
+---
+license: mit
+---
+
+LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control
+
+
+
+
+
+
+ 1 Kuaishou Technology 2 University of Science and Technology of China 3 Fudan University
+
+
+
+
+
+
+
+
+
+ 🔥 For more results, visit our homepage 🔥
+
+
+
+
+## 🔥 Updates
+- **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
+- **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
+
+## Introduction
+This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
+We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
+
+## 🔥 Getting Started
+### 1. Clone the code and prepare the environment
+```bash
+git clone https://github.com/KwaiVGI/LivePortrait
+cd LivePortrait
+
+# create env using conda
+conda create -n LivePortrait python==3.9.18
+conda activate LivePortrait
+# install dependencies with pip
+pip install -r requirements.txt
+```
+
+### 2. Download pretrained weights
+Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
+```text
+pretrained_weights
+├── insightface
+│ └── models
+│ └── buffalo_l
+│ ├── 2d106det.onnx
+│ └── det_10g.onnx
+└── liveportrait
+ ├── base_models
+ │ ├── appearance_feature_extractor.pth
+ │ ├── motion_extractor.pth
+ │ ├── spade_generator.pth
+ │ └── warping_module.pth
+ ├── landmark.onnx
+ └── retargeting_models
+ └── stitching_retargeting_module.pth
+```
+
+### 3. Inference 🚀
+
+```bash
+python inference.py
+```
+
+If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
+
+
+
+
+
+Or, you can change the input by specifying the `-s` and `-d` arguments:
+
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
+
+# or disable pasting back
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
+
+# more options to see
+python inference.py -h
+```
+
+**More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊
+
+### 4. Gradio interface
+
+We also provide a Gradio interface for a better experience, just run by:
+
+```bash
+python app.py
+```
+
+### 5. Inference speed evaluation 🚀🚀🚀
+We have also provided a script to evaluate the inference speed of each module:
+
+```bash
+python speed.py
+```
+
+Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
+
+| Model | Parameters(M) | Model Size(MB) | Inference(ms) |
+|-----------------------------------|:-------------:|:--------------:|:-------------:|
+| Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
+| Motion Extractor | 28.12 | 108 | 0.84 |
+| Spade Generator | 55.37 | 212 | 7.59 |
+| Warping Module | 45.53 | 174 | 5.21 |
+| Stitching and Retargeting Modules| 0.23 | 2.3 | 0.31 |
+
+*Note: the listed values of Stitching and Retargeting Modules represent the combined parameter counts and the total sequential inference time of three MLP networks.*
+
+
+## Acknowledgements
+We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
+
+## Citation 💖
+If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
+```bibtex
+@article{guo2024live,
+ title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
+ author = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
+ year = {2024},
+ journal = {arXiv preprint:2407.03168},
+}
+```
diff --git a/subprocess/LivePortrait/pretrained_weights/docs/inference.gif b/subprocess/LivePortrait/pretrained_weights/docs/inference.gif
new file mode 100644
index 0000000000000000000000000000000000000000..7e18022e5245dcb6449df6d190b538d5ca024e06
Binary files /dev/null and b/subprocess/LivePortrait/pretrained_weights/docs/inference.gif differ
diff --git a/subprocess/LivePortrait/pretrained_weights/docs/showcase2.gif b/subprocess/LivePortrait/pretrained_weights/docs/showcase2.gif
new file mode 100644
index 0000000000000000000000000000000000000000..29175c0eeb85b9db0ffd61e3e9281dffe3536352
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/docs/showcase2.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb1fffb139681775780b2956e7d0289f55d199c1a3e14ab263887864d4b0d586
+size 2881351
diff --git a/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/2d106det.onnx b/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/2d106det.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..cdb163d88b5f51396855ebc795e0114322c98b6b
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/2d106det.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
+size 5030888
diff --git a/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/det_10g.onnx b/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/det_10g.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..aa586e034379fa5ea5babc8aa73d47afcd0fa6c2
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/insightface/models/buffalo_l/det_10g.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
+size 16923827
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f05eb700c3eca1939c9d4e436bd063217eaa4587
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5279bb8654293dbdf327030b397f107237dd9212fb11dd75b83dfb635211ceb5
+size 3387959
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/motion_extractor.pth b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/motion_extractor.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a118cb8e26afc734be9abd4a6ef0163adcbd63b0
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/motion_extractor.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:251e6a94ad667a1d0c69526d292677165110ef7f0cf0f6d199f0e414e8aa0ca5
+size 112545506
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/spade_generator.pth b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/spade_generator.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0086702b84762790e06c5a4332f36d0857f594fc
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/spade_generator.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4780afc7909a9f84e24c01d73b31a555ef651521a1fe3b2429bd04534d992aee
+size 221813590
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/warping_module.pth b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/warping_module.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9d4cd1bcb62e2b654c28e32f66e56d51fb10389
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/base_models/warping_module.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f61a6f265fe344f14132364859a78bdbbc2068577170693da57fb96d636e282
+size 182180086
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/landmark.onnx b/subprocess/LivePortrait/pretrained_weights/liveportrait/landmark.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..48eb59185aa92b6efa2855ce99129d8aff248938
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/landmark.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31d22a5041326c31f19b78886939a634a5aedcaa5ab8b9b951a1167595d147db
+size 114666491
diff --git a/subprocess/LivePortrait/pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth b/subprocess/LivePortrait/pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59f0f3830b78b8587f0bd8b9ef8fd3ffdbd9290a
--- /dev/null
+++ b/subprocess/LivePortrait/pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3652d5a3f95099141a56986aaddec92fadf0a73c87a20fac9a2c07c32b28b611
+size 2393098
diff --git a/subprocess/LivePortrait/readme.md b/subprocess/LivePortrait/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..607637d57ecc99e8f78b8360c15fb4638c42f78d
--- /dev/null
+++ b/subprocess/LivePortrait/readme.md
@@ -0,0 +1,193 @@
+LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control
+
+
+
+
+
+
+ 1 Kuaishou Technology 2 University of Science and Technology of China 3 Fudan University
+
+
+
+
+
+
+
+
+
+ 🔥 For more results, visit our homepage 🔥
+
+
+
+
+## 🔥 Updates
+- **`2024/07/10`**: 💪 We support audio and video concatenating, driving video auto-cropping, and template making to protect privacy. More to see [here](assets/docs/changelog/2024-07-10.md).
+- **`2024/07/09`**: 🤗 We released the [HuggingFace Space](https://huggingface.co/spaces/KwaiVGI/liveportrait), thanks to the HF team and [Gradio](https://github.com/gradio-app/gradio)!
+- **`2024/07/04`**: 😊 We released the initial version of the inference code and models. Continuous updates, stay tuned!
+- **`2024/07/04`**: 🔥 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
+
+
+
+## Introduction
+This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
+We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
+
+## 🔥 Getting Started
+### 1. Clone the code and prepare the environment
+```bash
+git clone https://github.com/KwaiVGI/LivePortrait
+cd LivePortrait
+
+# create env using conda
+conda create -n LivePortrait python==3.9.18
+conda activate LivePortrait
+# install dependencies with pip
+pip install -r requirements.txt
+```
+
+### 2. Download pretrained weights
+
+Download the pretrained weights from HuggingFace:
+```bash
+# you may need to run `git lfs install` first
+git clone https://huggingface.co/KwaiVGI/liveportrait pretrained_weights
+```
+
+Or, download all pretrained weights from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
+```text
+pretrained_weights
+├── insightface
+│ └── models
+│ └── buffalo_l
+│ ├── 2d106det.onnx
+│ └── det_10g.onnx
+└── liveportrait
+ ├── base_models
+ │ ├── appearance_feature_extractor.pth
+ │ ├── motion_extractor.pth
+ │ ├── spade_generator.pth
+ │ └── warping_module.pth
+ ├── landmark.onnx
+ └── retargeting_models
+ └── stitching_retargeting_module.pth
+```
+
+### 3. Inference 🚀
+
+#### Fast hands-on
+```bash
+python inference.py
+```
+
+If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
+
+
+
+
+
+Or, you can change the input by specifying the `-s` and `-d` arguments:
+
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
+
+# disable pasting back to run faster
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
+
+# more options to see
+python inference.py -h
+```
+
+#### Driving video auto-cropping
+
+📕 To use your own driving video, we **recommend**:
+ - Crop it to a **1:1** aspect ratio (e.g., 512x512 or 256x256 pixels), or enable auto-cropping by `--flag_crop_driving_video`.
+ - Focus on the head area, similar to the example videos.
+ - Minimize shoulder movement.
+ - Make sure the first frame of driving video is a frontal face with **neutral expression**.
+
+Below is a auto-cropping case by `--flag_crop_driving_video`:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d13.mp4 --flag_crop_driving_video
+```
+
+If you find the results of auto-cropping is not well, you can modify the `--scale_crop_video`, `--vy_ratio_crop_video` options to adjust the scale and offset, or do it manually.
+
+#### Template making
+You can also use the `.pkl` file auto-generated to speed up the inference, and **protect privacy**, such as:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d5.pkl
+```
+
+**Discover more interesting results on our [Homepage](https://liveportrait.github.io)** 😊
+
+### 4. Gradio interface 🤗
+
+We also provide a Gradio interface for a better experience, just run by:
+
+```bash
+python app.py
+```
+
+You can specify the `--server_port`, `--share`, `--server_name` arguments to satisfy your needs!
+
+**Or, try it out effortlessly on [HuggingFace](https://huggingface.co/spaces/KwaiVGI/LivePortrait) 🤗**
+
+### 5. Inference speed evaluation 🚀🚀🚀
+We have also provided a script to evaluate the inference speed of each module:
+
+```bash
+python speed.py
+```
+
+Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
+
+| Model | Parameters(M) | Model Size(MB) | Inference(ms) |
+|-----------------------------------|:-------------:|:--------------:|:-------------:|
+| Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
+| Motion Extractor | 28.12 | 108 | 0.84 |
+| Spade Generator | 55.37 | 212 | 7.59 |
+| Warping Module | 45.53 | 174 | 5.21 |
+| Stitching and Retargeting Modules | 0.23 | 2.3 | 0.31 |
+
+*Note: The values for the Stitching and Retargeting Modules represent the combined parameter counts and total inference time of three sequential MLP networks.*
+
+## Community Resources 🤗
+
+Discover the invaluable resources contributed by our community to enhance your LivePortrait experience:
+
+- [ComfyUI-LivePortraitKJ](https://github.com/kijai/ComfyUI-LivePortraitKJ) by [@kijai](https://github.com/kijai)
+- [comfyui-liveportrait](https://github.com/shadowcz007/comfyui-liveportrait) by [@shadowcz007](https://github.com/shadowcz007)
+- [LivePortrait hands-on tutorial](https://www.youtube.com/watch?v=uyjSTAOY7yI) by [@AI Search](https://www.youtube.com/@theAIsearch)
+- [ComfyUI tutorial](https://www.youtube.com/watch?v=8-IcDDmiUMM) by [@Sebastian Kamph](https://www.youtube.com/@sebastiankamph)
+- [LivePortrait In ComfyUI](https://www.youtube.com/watch?v=aFcS31OWMjE) by [@Benji](https://www.youtube.com/@TheFutureThinker)
+- [Replicate Playground](https://replicate.com/fofr/live-portrait) and [cog-comfyui](https://github.com/fofr/cog-comfyui) by [@fofr](https://github.com/fofr)
+
+And many more amazing contributions from our community!
+
+## Acknowledgements
+We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
+
+## Citation 💖
+If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
+```bibtex
+@article{guo2024liveportrait,
+ title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
+ author = {Guo, Jianzhu and Zhang, Dingyun and Liu, Xiaoqiang and Zhong, Zhizhou and Zhang, Yuan and Wan, Pengfei and Zhang, Di},
+ journal = {arXiv preprint arXiv:2407.03168},
+ year = {2024}
+}
+```
diff --git a/subprocess/LivePortrait/requirements.txt b/subprocess/LivePortrait/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2e1c85102bfaa2873fe00b6ce030e847debee1b
--- /dev/null
+++ b/subprocess/LivePortrait/requirements.txt
@@ -0,0 +1,22 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.3.0
+torchvision==0.18.0
+torchaudio==2.3.0
+
+numpy==1.26.4
+pyyaml==6.0.1
+opencv-python==4.10.0.84
+scipy==1.13.1
+imageio==2.34.2
+lmdb==1.4.1
+tqdm==4.66.4
+rich==13.7.1
+ffmpeg-python==0.2.0
+onnxruntime-gpu==1.18.0
+onnx==1.16.1
+scikit-image==0.24.0
+albumentations==1.4.10
+matplotlib==3.9.0
+imageio-ffmpeg==0.5.1
+tyro==0.8.5
+gradio==4.37.1
diff --git a/subprocess/LivePortrait/speed.py b/subprocess/LivePortrait/speed.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cad2483a1eae9a7f89a73c961480404083038dd
--- /dev/null
+++ b/subprocess/LivePortrait/speed.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+
+"""
+Benchmark the inference speed of each module in LivePortrait.
+
+TODO: heavy GPT style, need to refactor
+"""
+
+import yaml
+import torch
+import time
+import numpy as np
+from src.utils.helper import load_model, concat_feat
+from src.config.inference_config import InferenceConfig
+
+
+def initialize_inputs(batch_size=1):
+ """
+ Generate random input tensors and move them to GPU
+ """
+ feature_3d = torch.randn(batch_size, 32, 16, 64, 64).cuda().half()
+ kp_source = torch.randn(batch_size, 21, 3).cuda().half()
+ kp_driving = torch.randn(batch_size, 21, 3).cuda().half()
+ source_image = torch.randn(batch_size, 3, 256, 256).cuda().half()
+ generator_input = torch.randn(batch_size, 256, 64, 64).cuda().half()
+ eye_close_ratio = torch.randn(batch_size, 3).cuda().half()
+ lip_close_ratio = torch.randn(batch_size, 2).cuda().half()
+ feat_stitching = concat_feat(kp_source, kp_driving).half()
+ feat_eye = concat_feat(kp_source, eye_close_ratio).half()
+ feat_lip = concat_feat(kp_source, lip_close_ratio).half()
+
+ inputs = {
+ 'feature_3d': feature_3d,
+ 'kp_source': kp_source,
+ 'kp_driving': kp_driving,
+ 'source_image': source_image,
+ 'generator_input': generator_input,
+ 'feat_stitching': feat_stitching,
+ 'feat_eye': feat_eye,
+ 'feat_lip': feat_lip
+ }
+
+ return inputs
+
+
+def load_and_compile_models(cfg, model_config):
+ """
+ Load and compile models for inference
+ """
+ appearance_feature_extractor = load_model(cfg.checkpoint_F, model_config, cfg.device, 'appearance_feature_extractor')
+ motion_extractor = load_model(cfg.checkpoint_M, model_config, cfg.device, 'motion_extractor')
+ warping_module = load_model(cfg.checkpoint_W, model_config, cfg.device, 'warping_module')
+ spade_generator = load_model(cfg.checkpoint_G, model_config, cfg.device, 'spade_generator')
+ stitching_retargeting_module = load_model(cfg.checkpoint_S, model_config, cfg.device, 'stitching_retargeting_module')
+
+ models_with_params = [
+ ('Appearance Feature Extractor', appearance_feature_extractor),
+ ('Motion Extractor', motion_extractor),
+ ('Warping Network', warping_module),
+ ('SPADE Decoder', spade_generator)
+ ]
+
+ compiled_models = {}
+ for name, model in models_with_params:
+ model = model.half()
+ model = torch.compile(model, mode='max-autotune') # Optimize for inference
+ model.eval() # Switch to evaluation mode
+ compiled_models[name] = model
+
+ retargeting_models = ['stitching', 'eye', 'lip']
+ for retarget in retargeting_models:
+ module = stitching_retargeting_module[retarget].half()
+ module = torch.compile(module, mode='max-autotune') # Optimize for inference
+ module.eval() # Switch to evaluation mode
+ stitching_retargeting_module[retarget] = module
+
+ return compiled_models, stitching_retargeting_module
+
+
+def warm_up_models(compiled_models, stitching_retargeting_module, inputs):
+ """
+ Warm up models to prepare them for benchmarking
+ """
+ print("Warm up start!")
+ with torch.no_grad():
+ for _ in range(10):
+ compiled_models['Appearance Feature Extractor'](inputs['source_image'])
+ compiled_models['Motion Extractor'](inputs['source_image'])
+ compiled_models['Warping Network'](inputs['feature_3d'], inputs['kp_driving'], inputs['kp_source'])
+ compiled_models['SPADE Decoder'](inputs['generator_input']) # Adjust input as required
+ stitching_retargeting_module['stitching'](inputs['feat_stitching'])
+ stitching_retargeting_module['eye'](inputs['feat_eye'])
+ stitching_retargeting_module['lip'](inputs['feat_lip'])
+ print("Warm up end!")
+
+
+def measure_inference_times(compiled_models, stitching_retargeting_module, inputs):
+ """
+ Measure inference times for each model
+ """
+ times = {name: [] for name in compiled_models.keys()}
+ times['Retargeting Models'] = []
+
+ overall_times = []
+
+ with torch.no_grad():
+ for _ in range(100):
+ torch.cuda.synchronize()
+ overall_start = time.time()
+
+ start = time.time()
+ compiled_models['Appearance Feature Extractor'](inputs['source_image'])
+ torch.cuda.synchronize()
+ times['Appearance Feature Extractor'].append(time.time() - start)
+
+ start = time.time()
+ compiled_models['Motion Extractor'](inputs['source_image'])
+ torch.cuda.synchronize()
+ times['Motion Extractor'].append(time.time() - start)
+
+ start = time.time()
+ compiled_models['Warping Network'](inputs['feature_3d'], inputs['kp_driving'], inputs['kp_source'])
+ torch.cuda.synchronize()
+ times['Warping Network'].append(time.time() - start)
+
+ start = time.time()
+ compiled_models['SPADE Decoder'](inputs['generator_input']) # Adjust input as required
+ torch.cuda.synchronize()
+ times['SPADE Decoder'].append(time.time() - start)
+
+ start = time.time()
+ stitching_retargeting_module['stitching'](inputs['feat_stitching'])
+ stitching_retargeting_module['eye'](inputs['feat_eye'])
+ stitching_retargeting_module['lip'](inputs['feat_lip'])
+ torch.cuda.synchronize()
+ times['Retargeting Models'].append(time.time() - start)
+
+ overall_times.append(time.time() - overall_start)
+
+ return times, overall_times
+
+
+def print_benchmark_results(compiled_models, stitching_retargeting_module, retargeting_models, times, overall_times):
+ """
+ Print benchmark results with average and standard deviation of inference times
+ """
+ average_times = {name: np.mean(times[name]) * 1000 for name in times.keys()}
+ std_times = {name: np.std(times[name]) * 1000 for name in times.keys()}
+
+ for name, model in compiled_models.items():
+ num_params = sum(p.numel() for p in model.parameters())
+ num_params_in_millions = num_params / 1e6
+ print(f"Number of parameters for {name}: {num_params_in_millions:.2f} M")
+
+ for index, retarget in enumerate(retargeting_models):
+ num_params = sum(p.numel() for p in stitching_retargeting_module[retarget].parameters())
+ num_params_in_millions = num_params / 1e6
+ print(f"Number of parameters for part_{index} in Stitching and Retargeting Modules: {num_params_in_millions:.2f} M")
+
+ for name, avg_time in average_times.items():
+ std_time = std_times[name]
+ print(f"Average inference time for {name} over 100 runs: {avg_time:.2f} ms (std: {std_time:.2f} ms)")
+
+
+def main():
+ """
+ Main function to benchmark speed and model parameters
+ """
+ # Sample input tensors
+ inputs = initialize_inputs()
+
+ # Load configuration
+ cfg = InferenceConfig(device_id=0)
+ model_config_path = cfg.models_config
+ with open(model_config_path, 'r') as file:
+ model_config = yaml.safe_load(file)
+
+ # Load and compile models
+ compiled_models, stitching_retargeting_module = load_and_compile_models(cfg, model_config)
+
+ # Warm up models
+ warm_up_models(compiled_models, stitching_retargeting_module, inputs)
+
+ # Measure inference times
+ times, overall_times = measure_inference_times(compiled_models, stitching_retargeting_module, inputs)
+
+ # Print benchmark results
+ print_benchmark_results(compiled_models, stitching_retargeting_module, ['stitching', 'eye', 'lip'], times, overall_times)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-310.pyc b/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..813564ec8ca0b90a69b2e8aefe747fb594a1109c
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-39.pyc b/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33d2acb7162450f596fe784d4607efaf255a9a6c
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/gradio_pipeline.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-310.pyc b/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9c7c5d1d57a2ea5fb2552a49a4639cd8496b15b
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-39.pyc b/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d60b0a620e4cc0e9c618d9f90da8e0521e605c66
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/live_portrait_pipeline.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-310.pyc b/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fe9a0162bc65096c38c2a9fbe4272e6dc0bffbd0
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-39.pyc b/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22ed74ba11548f236fceef01a74367129ad40e2d
Binary files /dev/null and b/subprocess/LivePortrait/src/__pycache__/live_portrait_wrapper.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__init__.py b/subprocess/LivePortrait/src/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..455f96f26cfe6d2a81fa09ef0813714d2538b3b0
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..474f67705bd1e59ad812f03611f384d9431fd70e
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-310.pyc b/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ecb6ba4656b0c5e8e2ab3f41c4f490a3c7747c8
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-39.pyc b/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee31b9ad4a955b50d14f1d69124c658d7ae25f5b
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/argument_config.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-310.pyc b/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e8d01e8b98e40320ed3369f8c139030918a4e22f
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-39.pyc b/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6463b2addaa81a3ddd3321f254030bf8057d31d7
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/base_config.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-310.pyc b/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd36d7ceee25ef8538eda5b34782dea3d6af26bf
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-39.pyc b/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5496a1b79fa2cf2b80a5f9aac68f2b257364b11
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/crop_config.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-310.pyc b/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d4ad3ce4da41810eb7a9693baa460883451788c
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-39.pyc b/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bec8296a8365ee91c83b6aa981acb99a4080404
Binary files /dev/null and b/subprocess/LivePortrait/src/config/__pycache__/inference_config.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/config/argument_config.py b/subprocess/LivePortrait/src/config/argument_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bbaa201ccc42631aa9b4681c9ccadf8e8379983
--- /dev/null
+++ b/subprocess/LivePortrait/src/config/argument_config.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+
+"""
+All configs for user
+"""
+
+from dataclasses import dataclass
+import tyro
+from typing_extensions import Annotated
+from typing import Optional
+from .base_config import PrintableConfig, make_abs_path
+
+
+@dataclass(repr=False) # use repr from PrintableConfig
+class ArgumentConfig(PrintableConfig):
+ ########## input arguments ##########
+ source_image: Annotated[str, tyro.conf.arg(aliases=["-s"])] = make_abs_path('../../assets/examples/source/s6.jpg') # path to the source portrait
+ driving_info: Annotated[str, tyro.conf.arg(aliases=["-d"])] = make_abs_path('../../assets/examples/driving/d0.mp4') # path to driving video or template (.pkl format)
+ output_dir: Annotated[str, tyro.conf.arg(aliases=["-o"])] = 'animations/' # directory to save output video
+
+ ########## inference arguments ##########
+ flag_use_half_precision: bool = True # whether to use half precision (FP16). If black boxes appear, it might be due to GPU incompatibility; set to False.
+ flag_crop_driving_video: bool = False # whether to crop the driving video, if the given driving info is a video
+ device_id: int = 0 # gpu device id
+ flag_force_cpu: bool = False # force cpu inference, WIP!
+ flag_lip_zero : bool = True # whether let the lip to close state before animation, only take effect when flag_eye_retargeting and flag_lip_retargeting is False
+ flag_eye_retargeting: bool = False # not recommend to be True, WIP
+ flag_lip_retargeting: bool = False # not recommend to be True, WIP
+ flag_stitching: bool = True # recommend to True if head movement is small, False if head movement is large
+ flag_relative_motion: bool = True # whether to use relative motion
+ flag_pasteback: bool = True # whether to paste-back/stitch the animated face cropping from the face-cropping space to the original image space
+ flag_do_crop: bool = True # whether to crop the source portrait to the face-cropping space
+ flag_do_rot: bool = True # whether to conduct the rotation when flag_do_crop is True
+
+ ########## crop arguments ##########
+ scale: float = 2.3 # the ratio of face area is smaller if scale is larger
+ vx_ratio: float = 0 # the ratio to move the face to left or right in cropping space
+ vy_ratio: float = -0.125 # the ratio to move the face to up or down in cropping space
+
+ scale_crop_video: float = 2.2 # scale factor for cropping video
+ vx_ratio_crop_video: float = 0. # adjust y offset
+ vy_ratio_crop_video: float = -0.1 # adjust x offset
+
+ ########## gradio arguments ##########
+ server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])] = 8890 # port for gradio server
+ share: bool = False # whether to share the server to public
+ server_name: Optional[str] = "127.0.0.1" # set the local server name, "0.0.0.0" to broadcast all
diff --git a/subprocess/LivePortrait/src/config/base_config.py b/subprocess/LivePortrait/src/config/base_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..216b8be50aecc8af4b9d1d2a9401e034dd7769e4
--- /dev/null
+++ b/subprocess/LivePortrait/src/config/base_config.py
@@ -0,0 +1,29 @@
+# coding: utf-8
+
+"""
+pretty printing class
+"""
+
+from __future__ import annotations
+import os.path as osp
+from typing import Tuple
+
+
+def make_abs_path(fn):
+ return osp.join(osp.dirname(osp.realpath(__file__)), fn)
+
+
+class PrintableConfig: # pylint: disable=too-few-public-methods
+ """Printable Config defining str function"""
+
+ def __repr__(self):
+ lines = [self.__class__.__name__ + ":"]
+ for key, val in vars(self).items():
+ if isinstance(val, Tuple):
+ flattened_val = "["
+ for item in val:
+ flattened_val += str(item) + "\n"
+ flattened_val = flattened_val.rstrip("\n")
+ val = flattened_val + "]"
+ lines += f"{key}: {str(val)}".split("\n")
+ return "\n ".join(lines)
diff --git a/subprocess/LivePortrait/src/config/crop_config.py b/subprocess/LivePortrait/src/config/crop_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4c8b12c4528409dabafcc593f7d86daf7e9c2a4
--- /dev/null
+++ b/subprocess/LivePortrait/src/config/crop_config.py
@@ -0,0 +1,28 @@
+# coding: utf-8
+
+"""
+parameters used for crop faces
+"""
+
+import os.path as osp
+from dataclasses import dataclass
+from typing import Union, List
+from .base_config import PrintableConfig
+
+
+@dataclass(repr=False) # use repr from PrintableConfig
+class CropConfig(PrintableConfig):
+ device_id: int = 0 # gpu device id
+ flag_force_cpu: bool = False # force cpu inference, WIP
+ ########## source image cropping option ##########
+ dsize: int = 512 # crop size
+ scale: float = 2.5 # scale factor
+ vx_ratio: float = 0 # vx ratio
+ vy_ratio: float = -0.125 # vy ratio +up, -down
+ max_face_num: int = 0 # max face number, 0 mean no limit
+
+ ########## driving video auto cropping option ##########
+ scale_crop_video: float = 2.2 #2.0 # scale factor for cropping video
+ vx_ratio_crop_video: float = 0. # adjust y offset
+ vy_ratio_crop_video: float = -0.1 # adjust x offset
+ direction: str = 'large-small' # direction of cropping
diff --git a/subprocess/LivePortrait/src/config/inference_config.py b/subprocess/LivePortrait/src/config/inference_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..70eedd81d42a3958e05cb2124ded7a79be5649a3
--- /dev/null
+++ b/subprocess/LivePortrait/src/config/inference_config.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+
+"""
+config dataclass used for inference
+"""
+
+import os.path as osp
+import cv2
+from numpy import ndarray
+from dataclasses import dataclass
+from typing import Literal, Tuple
+from .base_config import PrintableConfig, make_abs_path
+
+
+@dataclass(repr=False) # use repr from PrintableConfig
+class InferenceConfig(PrintableConfig):
+ # MODEL CONFIG, NOT EXPOERTED PARAMS
+ models_config: str = make_abs_path('./models.yaml') # portrait animation config
+ checkpoint_F: str = make_abs_path('../../pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth') # path to checkpoint of F
+ checkpoint_M: str = make_abs_path('../../pretrained_weights/liveportrait/base_models/motion_extractor.pth') # path to checkpoint pf M
+ checkpoint_G: str = make_abs_path('../../pretrained_weights/liveportrait/base_models/spade_generator.pth') # path to checkpoint of G
+ checkpoint_W: str = make_abs_path('../../pretrained_weights/liveportrait/base_models/warping_module.pth') # path to checkpoint of W
+ checkpoint_S: str = make_abs_path('../../pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth') # path to checkpoint to S and R_eyes, R_lip
+
+ # EXPOERTED PARAMS
+ flag_use_half_precision: bool = True
+ flag_crop_driving_video: bool = False
+ device_id: int = 0
+ flag_lip_zero: bool = True
+ flag_eye_retargeting: bool = False
+ flag_lip_retargeting: bool = False
+ flag_stitching: bool = True
+ flag_relative_motion: bool = True
+ flag_pasteback: bool = True
+ flag_do_crop: bool = True
+ flag_do_rot: bool = True
+ flag_force_cpu: bool = False
+
+ # NOT EXPOERTED PARAMS
+ lip_zero_threshold: float = 0.03 # threshold for flag_lip_zero
+ anchor_frame: int = 0 # TO IMPLEMENT
+
+ input_shape: Tuple[int, int] = (256, 256) # input shape
+ output_format: Literal['mp4', 'gif'] = 'mp4' # output video format
+ crf: int = 15 # crf for output video
+ output_fps: int = 25 # default output fps
+
+ mask_crop: ndarray = cv2.imread(make_abs_path('../utils/resources/mask_template.png'), cv2.IMREAD_COLOR)
+ size_gif: int = 256 # default gif size, TO IMPLEMENT
+ source_max_dim: int = 1280 # the max dim of height and width of source image
+ source_division: int = 2 # make sure the height and width of source image can be divided by this number
diff --git a/subprocess/LivePortrait/src/config/models.yaml b/subprocess/LivePortrait/src/config/models.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..131d1c65025c31e37af9239e211ea14454128a2e
--- /dev/null
+++ b/subprocess/LivePortrait/src/config/models.yaml
@@ -0,0 +1,43 @@
+model_params:
+ appearance_feature_extractor_params: # the F in the paper
+ image_channel: 3
+ block_expansion: 64
+ num_down_blocks: 2
+ max_features: 512
+ reshape_channel: 32
+ reshape_depth: 16
+ num_resblocks: 6
+ motion_extractor_params: # the M in the paper
+ num_kp: 21
+ backbone: convnextv2_tiny
+ warping_module_params: # the W in the paper
+ num_kp: 21
+ block_expansion: 64
+ max_features: 512
+ num_down_blocks: 2
+ reshape_channel: 32
+ estimate_occlusion_map: True
+ dense_motion_params:
+ block_expansion: 32
+ max_features: 1024
+ num_blocks: 5
+ reshape_depth: 16
+ compress: 4
+ spade_generator_params: # the G in the paper
+ upscale: 2 # represents upsample factor 256x256 -> 512x512
+ block_expansion: 64
+ max_features: 512
+ num_down_blocks: 2
+ stitching_retargeting_module_params: # the S in the paper
+ stitching:
+ input_size: 126 # (21*3)*2
+ hidden_sizes: [128, 128, 64]
+ output_size: 65 # (21*3)+2(tx,ty)
+ lip:
+ input_size: 65 # (21*3)+2
+ hidden_sizes: [128, 128, 64]
+ output_size: 63 # (21*3)
+ eye:
+ input_size: 66 # (21*3)+3
+ hidden_sizes: [256, 256, 128, 128, 64]
+ output_size: 63 # (21*3)
diff --git a/subprocess/LivePortrait/src/gradio_pipeline.py b/subprocess/LivePortrait/src/gradio_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7343f7df6b8a6c6815c5af3526ed6dc857a7c0c
--- /dev/null
+++ b/subprocess/LivePortrait/src/gradio_pipeline.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+
+"""
+Pipeline for gradio
+"""
+import gradio as gr
+
+from .config.argument_config import ArgumentConfig
+from .live_portrait_pipeline import LivePortraitPipeline
+from .utils.io import load_img_online
+from .utils.rprint import rlog as log
+from .utils.crop import prepare_paste_back, paste_back
+from .utils.camera import get_rotation_matrix
+
+
+def update_args(args, user_args):
+ """update the args according to user inputs
+ """
+ for k, v in user_args.items():
+ if hasattr(args, k):
+ setattr(args, k, v)
+ return args
+
+
+class GradioPipeline(LivePortraitPipeline):
+
+ def __init__(self, inference_cfg, crop_cfg, args: ArgumentConfig):
+ super().__init__(inference_cfg, crop_cfg)
+ # self.live_portrait_wrapper = self.live_portrait_wrapper
+ self.args = args
+
+ def execute_video(
+ self,
+ input_image_path,
+ input_video_path,
+ flag_relative_input,
+ flag_do_crop_input,
+ flag_remap_input,
+ flag_crop_driving_video_input
+ ):
+ """ for video driven potrait animation
+ """
+ if input_image_path is not None and input_video_path is not None:
+ args_user = {
+ 'source_image': input_image_path,
+ 'driving_info': input_video_path,
+ 'flag_relative': flag_relative_input,
+ 'flag_do_crop': flag_do_crop_input,
+ 'flag_pasteback': flag_remap_input,
+ 'flag_crop_driving_video': flag_crop_driving_video_input
+ }
+ # update config from user input
+ self.args = update_args(self.args, args_user)
+ self.live_portrait_wrapper.update_config(self.args.__dict__)
+ self.cropper.update_config(self.args.__dict__)
+ # video driven animation
+ video_path, video_path_concat = self.execute(self.args)
+ gr.Info("Run successfully!", duration=2)
+ return video_path, video_path_concat,
+ else:
+ raise gr.Error("The input source portrait or driving video hasn't been prepared yet 💥!", duration=5)
+
+ def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop=True):
+ """ for single image retargeting
+ """
+ # disposable feature
+ f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
+ self.prepare_retargeting(input_image, flag_do_crop)
+
+ if input_eye_ratio is None or input_lip_ratio is None:
+ raise gr.Error("Invalid ratio input 💥!", duration=5)
+ else:
+ inference_cfg = self.live_portrait_wrapper.inference_cfg
+ x_s_user = x_s_user.to(self.live_portrait_wrapper.device)
+ f_s_user = f_s_user.to(self.live_portrait_wrapper.device)
+ # ∆_eyes,i = R_eyes(x_s; c_s,eyes, c_d,eyes,i)
+ combined_eye_ratio_tensor = self.live_portrait_wrapper.calc_combined_eye_ratio([[input_eye_ratio]], source_lmk_user)
+ eyes_delta = self.live_portrait_wrapper.retarget_eye(x_s_user, combined_eye_ratio_tensor)
+ # ∆_lip,i = R_lip(x_s; c_s,lip, c_d,lip,i)
+ combined_lip_ratio_tensor = self.live_portrait_wrapper.calc_combined_lip_ratio([[input_lip_ratio]], source_lmk_user)
+ lip_delta = self.live_portrait_wrapper.retarget_lip(x_s_user, combined_lip_ratio_tensor)
+ num_kp = x_s_user.shape[1]
+ # default: use x_s
+ x_d_new = x_s_user + eyes_delta.reshape(-1, num_kp, 3) + lip_delta.reshape(-1, num_kp, 3)
+ # D(W(f_s; x_s, x′_d))
+ out = self.live_portrait_wrapper.warp_decode(f_s_user, x_s_user, x_d_new)
+ out = self.live_portrait_wrapper.parse_output(out['out'])[0]
+ out_to_ori_blend = paste_back(out, crop_M_c2o, img_rgb, mask_ori)
+ gr.Info("Run successfully!", duration=2)
+ return out, out_to_ori_blend
+
+ def prepare_retargeting(self, input_image, flag_do_crop=True):
+ """ for single image retargeting
+ """
+ if input_image is not None:
+ # gr.Info("Upload successfully!", duration=2)
+ inference_cfg = self.live_portrait_wrapper.inference_cfg
+ ######## process source portrait ########
+ img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16)
+ log(f"Load source image from {input_image}.")
+ crop_info = self.cropper.crop_source_image(img_rgb, self.cropper.crop_cfg)
+ if flag_do_crop:
+ I_s = self.live_portrait_wrapper.prepare_source(crop_info['img_crop_256x256'])
+ else:
+ I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
+ x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
+ R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
+ ############################################
+ f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
+ x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
+ source_lmk_user = crop_info['lmk_crop']
+ crop_M_c2o = crop_info['M_c2o']
+ mask_ori = prepare_paste_back(inference_cfg.mask_crop, crop_info['M_c2o'], dsize=(img_rgb.shape[1], img_rgb.shape[0]))
+ return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
+ else:
+ # when press the clear button, go here
+ raise gr.Error("The retargeting input hasn't been prepared yet 💥!", duration=5)
diff --git a/subprocess/LivePortrait/src/live_portrait_pipeline.py b/subprocess/LivePortrait/src/live_portrait_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..a27b52f82758583795a6780f0c83487f55366ac1
--- /dev/null
+++ b/subprocess/LivePortrait/src/live_portrait_pipeline.py
@@ -0,0 +1,285 @@
+# coding: utf-8
+
+"""
+Pipeline of LivePortrait
+"""
+
+import torch
+torch.backends.cudnn.benchmark = True # disable CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR warning
+
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
+import numpy as np
+import os
+import os.path as osp
+from rich.progress import track
+
+from .config.argument_config import ArgumentConfig
+from .config.inference_config import InferenceConfig
+from .config.crop_config import CropConfig
+from .utils.cropper import Cropper
+from .utils.camera import get_rotation_matrix
+from .utils.video import images2video, concat_frames, get_fps, add_audio_to_video, has_audio_stream
+from .utils.crop import _transform_img, prepare_paste_back, paste_back
+from .utils.io import load_image_rgb, load_driving_info, resize_to_limit, dump, load
+from .utils.helper import mkdir, basename, dct2device, is_video, is_template, remove_suffix
+from .utils.rprint import rlog as log
+# from .utils.viz import viz_lmk
+from .live_portrait_wrapper import LivePortraitWrapper
+
+
+def make_abs_path(fn):
+ return osp.join(osp.dirname(osp.realpath(__file__)), fn)
+
+
+class LivePortraitPipeline(object):
+
+ def __init__(self, inference_cfg: InferenceConfig, crop_cfg: CropConfig):
+ self.live_portrait_wrapper: LivePortraitWrapper = LivePortraitWrapper(inference_cfg=inference_cfg)
+ self.cropper: Cropper = Cropper(crop_cfg=crop_cfg)
+
+ def execute(self, args: ArgumentConfig):
+ # for convenience
+ inf_cfg = self.live_portrait_wrapper.inference_cfg
+ device = self.live_portrait_wrapper.device
+ crop_cfg = self.cropper.crop_cfg
+
+ ######## process source portrait ########
+ img_rgb = load_image_rgb(args.source_image)
+ img_rgb = resize_to_limit(img_rgb, inf_cfg.source_max_dim, inf_cfg.source_division)
+ log(f"Load source image from {args.source_image}")
+
+ crop_info = self.cropper.crop_source_image(img_rgb, crop_cfg)
+ if crop_info is None:
+ raise Exception("No face detected in the source image!")
+ source_lmk = crop_info['lmk_crop']
+ img_crop, img_crop_256x256 = crop_info['img_crop'], crop_info['img_crop_256x256']
+
+ if inf_cfg.flag_do_crop:
+ I_s = self.live_portrait_wrapper.prepare_source(img_crop_256x256)
+ else:
+ img_crop_256x256 = cv2.resize(img_rgb, (256, 256)) # force to resize to 256x256
+ I_s = self.live_portrait_wrapper.prepare_source(img_crop_256x256)
+ x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
+ x_c_s = x_s_info['kp']
+ R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
+ f_s = self.live_portrait_wrapper.extract_feature_3d(I_s)
+ x_s = self.live_portrait_wrapper.transform_keypoint(x_s_info)
+
+ flag_lip_zero = inf_cfg.flag_lip_zero # not overwrite
+ if flag_lip_zero:
+ # let lip-open scalar to be 0 at first
+ c_d_lip_before_animation = [0.]
+ combined_lip_ratio_tensor_before_animation = self.live_portrait_wrapper.calc_combined_lip_ratio(c_d_lip_before_animation, source_lmk)
+ if combined_lip_ratio_tensor_before_animation[0][0] < inf_cfg.lip_zero_threshold:
+ flag_lip_zero = False
+ else:
+ lip_delta_before_animation = self.live_portrait_wrapper.retarget_lip(x_s, combined_lip_ratio_tensor_before_animation)
+ ############################################
+
+ ######## process driving info ########
+ flag_load_from_template = is_template(args.driving_info)
+ driving_rgb_crop_256x256_lst = None
+ wfp_template = None
+
+ if flag_load_from_template:
+ # NOTE: load from template, it is fast, but the cropping video is None
+ log(f"Load from template: {args.driving_info}, NOT the video, so the cropping video and audio are both NULL.", style='bold green')
+ template_dct = load(args.driving_info)
+ n_frames = template_dct['n_frames']
+
+ # set output_fps
+ output_fps = template_dct.get('output_fps', inf_cfg.output_fps)
+ log(f'The FPS of template: {output_fps}')
+
+ if args.flag_crop_driving_video:
+ log("Warning: flag_crop_driving_video is True, but the driving info is a template, so it is ignored.")
+
+ elif osp.exists(args.driving_info) and is_video(args.driving_info):
+ # load from video file, AND make motion template
+ log(f"Load video: {args.driving_info}")
+ if osp.isdir(args.driving_info):
+ output_fps = inf_cfg.output_fps
+ else:
+ output_fps = int(get_fps(args.driving_info))
+ log(f'The FPS of {args.driving_info} is: {output_fps}')
+
+ log(f"Load video file (mp4 mov avi etc...): {args.driving_info}")
+ driving_rgb_lst = load_driving_info(args.driving_info)
+
+ ######## make motion template ########
+ log("Start making motion template...")
+ if inf_cfg.flag_crop_driving_video:
+ ret = self.cropper.crop_driving_video(driving_rgb_lst)
+ log(f'Driving video is cropped, {len(ret["frame_crop_lst"])} frames are processed.')
+ driving_rgb_crop_lst, driving_lmk_crop_lst = ret['frame_crop_lst'], ret['lmk_crop_lst']
+ driving_rgb_crop_256x256_lst = [cv2.resize(_, (256, 256)) for _ in driving_rgb_crop_lst]
+ else:
+ driving_lmk_crop_lst = self.cropper.calc_lmks_from_cropped_video(driving_rgb_lst)
+ driving_rgb_crop_256x256_lst = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst] # force to resize to 256x256
+
+ c_d_eyes_lst, c_d_lip_lst = self.live_portrait_wrapper.calc_driving_ratio(driving_lmk_crop_lst)
+ # save the motion template
+ I_d_lst = self.live_portrait_wrapper.prepare_driving_videos(driving_rgb_crop_256x256_lst)
+ template_dct = self.make_motion_template(I_d_lst, c_d_eyes_lst, c_d_lip_lst, output_fps=output_fps)
+
+ wfp_template = remove_suffix(args.driving_info) + '.pkl'
+ dump(wfp_template, template_dct)
+ log(f"Dump motion template to {wfp_template}")
+
+ n_frames = I_d_lst.shape[0]
+ else:
+ raise Exception(f"{args.driving_info} not exists or unsupported driving info types!")
+ #########################################
+
+ ######## prepare for pasteback ########
+ I_p_pstbk_lst = None
+ if inf_cfg.flag_pasteback and inf_cfg.flag_do_crop and inf_cfg.flag_stitching:
+ mask_ori_float = prepare_paste_back(inf_cfg.mask_crop, crop_info['M_c2o'], dsize=(img_rgb.shape[1], img_rgb.shape[0]))
+ I_p_pstbk_lst = []
+ log("Prepared pasteback mask done.")
+ #########################################
+
+ I_p_lst = []
+ R_d_0, x_d_0_info = None, None
+
+ for i in track(range(n_frames), description='🚀Animating...', total=n_frames):
+ x_d_i_info = template_dct['motion'][i]
+ x_d_i_info = dct2device(x_d_i_info, device)
+ R_d_i = x_d_i_info['R_d']
+
+ if i == 0:
+ R_d_0 = R_d_i
+ x_d_0_info = x_d_i_info
+
+ if inf_cfg.flag_relative_motion:
+ R_new = (R_d_i @ R_d_0.permute(0, 2, 1)) @ R_s
+ delta_new = x_s_info['exp'] + (x_d_i_info['exp'] - x_d_0_info['exp'])
+ scale_new = x_s_info['scale'] * (x_d_i_info['scale'] / x_d_0_info['scale'])
+ t_new = x_s_info['t'] + (x_d_i_info['t'] - x_d_0_info['t'])
+ else:
+ R_new = R_d_i
+ delta_new = x_d_i_info['exp']
+ scale_new = x_s_info['scale']
+ t_new = x_d_i_info['t']
+
+ t_new[..., 2].fill_(0) # zero tz
+ x_d_i_new = scale_new * (x_c_s @ R_new + delta_new) + t_new
+
+ # Algorithm 1:
+ if not inf_cfg.flag_stitching and not inf_cfg.flag_eye_retargeting and not inf_cfg.flag_lip_retargeting:
+ # without stitching or retargeting
+ if flag_lip_zero:
+ x_d_i_new += lip_delta_before_animation.reshape(-1, x_s.shape[1], 3)
+ else:
+ pass
+ elif inf_cfg.flag_stitching and not inf_cfg.flag_eye_retargeting and not inf_cfg.flag_lip_retargeting:
+ # with stitching and without retargeting
+ if flag_lip_zero:
+ x_d_i_new = self.live_portrait_wrapper.stitching(x_s, x_d_i_new) + lip_delta_before_animation.reshape(-1, x_s.shape[1], 3)
+ else:
+ x_d_i_new = self.live_portrait_wrapper.stitching(x_s, x_d_i_new)
+ else:
+ eyes_delta, lip_delta = None, None
+ if inf_cfg.flag_eye_retargeting:
+ c_d_eyes_i = c_d_eyes_lst[i]
+ combined_eye_ratio_tensor = self.live_portrait_wrapper.calc_combined_eye_ratio(c_d_eyes_i, source_lmk)
+ # ∆_eyes,i = R_eyes(x_s; c_s,eyes, c_d,eyes,i)
+ eyes_delta = self.live_portrait_wrapper.retarget_eye(x_s, combined_eye_ratio_tensor)
+ if inf_cfg.flag_lip_retargeting:
+ c_d_lip_i = c_d_lip_lst[i]
+ combined_lip_ratio_tensor = self.live_portrait_wrapper.calc_combined_lip_ratio(c_d_lip_i, source_lmk)
+ # ∆_lip,i = R_lip(x_s; c_s,lip, c_d,lip,i)
+ lip_delta = self.live_portrait_wrapper.retarget_lip(x_s, combined_lip_ratio_tensor)
+
+ if inf_cfg.flag_relative_motion: # use x_s
+ x_d_i_new = x_s + \
+ (eyes_delta.reshape(-1, x_s.shape[1], 3) if eyes_delta is not None else 0) + \
+ (lip_delta.reshape(-1, x_s.shape[1], 3) if lip_delta is not None else 0)
+ else: # use x_d,i
+ x_d_i_new = x_d_i_new + \
+ (eyes_delta.reshape(-1, x_s.shape[1], 3) if eyes_delta is not None else 0) + \
+ (lip_delta.reshape(-1, x_s.shape[1], 3) if lip_delta is not None else 0)
+
+ if inf_cfg.flag_stitching:
+ x_d_i_new = self.live_portrait_wrapper.stitching(x_s, x_d_i_new)
+
+ out = self.live_portrait_wrapper.warp_decode(f_s, x_s, x_d_i_new)
+ I_p_i = self.live_portrait_wrapper.parse_output(out['out'])[0]
+ I_p_lst.append(I_p_i)
+
+ if inf_cfg.flag_pasteback and inf_cfg.flag_do_crop and inf_cfg.flag_stitching:
+ # TODO: pasteback is slow, considering optimize it using multi-threading or GPU
+ I_p_pstbk = paste_back(I_p_i, crop_info['M_c2o'], img_rgb, mask_ori_float)
+ I_p_pstbk_lst.append(I_p_pstbk)
+
+ mkdir(args.output_dir)
+ wfp_concat = None
+ flag_has_audio = (not flag_load_from_template) and has_audio_stream(args.driving_info)
+
+ ######### build final concact result #########
+ # driving frame | source image | generation, or source image | generation
+ frames_concatenated = concat_frames(driving_rgb_crop_256x256_lst, img_crop_256x256, I_p_lst)
+ wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
+ images2video(frames_concatenated, wfp=wfp_concat, fps=output_fps)
+
+ if flag_has_audio:
+ # final result with concact
+ wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat_with_audio.mp4')
+ add_audio_to_video(wfp_concat, args.driving_info, wfp_concat_with_audio)
+ os.replace(wfp_concat_with_audio, wfp_concat)
+ log(f"Replace {wfp_concat} with {wfp_concat_with_audio}")
+
+ # save drived result
+ wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
+ if I_p_pstbk_lst is not None and len(I_p_pstbk_lst) > 0:
+ images2video(I_p_pstbk_lst, wfp=wfp, fps=output_fps)
+ else:
+ images2video(I_p_lst, wfp=wfp, fps=output_fps)
+
+ ######### build final result #########
+ if flag_has_audio:
+ wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_with_audio.mp4')
+ add_audio_to_video(wfp, args.driving_info, wfp_with_audio)
+ os.replace(wfp_with_audio, wfp)
+ log(f"Replace {wfp} with {wfp_with_audio}")
+
+ # final log
+ if wfp_template not in (None, ''):
+ log(f'Animated template: {wfp_template}, you can specify `-d` argument with this template path next time to avoid cropping video, motion making and protecting privacy.', style='bold green')
+ log(f'Animated video: {wfp}')
+ log(f'Animated video with concact: {wfp_concat}')
+
+ return wfp, wfp_concat
+
+ def make_motion_template(self, I_d_lst, c_d_eyes_lst, c_d_lip_lst, **kwargs):
+ n_frames = I_d_lst.shape[0]
+ template_dct = {
+ 'n_frames': n_frames,
+ 'output_fps': kwargs.get('output_fps', 25),
+ 'motion': [],
+ 'c_d_eyes_lst': [],
+ 'c_d_lip_lst': [],
+ }
+
+ for i in track(range(n_frames), description='Making motion templates...', total=n_frames):
+ # collect s_d, R_d, δ_d and t_d for inference
+ I_d_i = I_d_lst[i]
+ x_d_i_info = self.live_portrait_wrapper.get_kp_info(I_d_i)
+ R_d_i = get_rotation_matrix(x_d_i_info['pitch'], x_d_i_info['yaw'], x_d_i_info['roll'])
+
+ item_dct = {
+ 'scale': x_d_i_info['scale'].cpu().numpy().astype(np.float32),
+ 'R_d': R_d_i.cpu().numpy().astype(np.float32),
+ 'exp': x_d_i_info['exp'].cpu().numpy().astype(np.float32),
+ 't': x_d_i_info['t'].cpu().numpy().astype(np.float32),
+ }
+
+ template_dct['motion'].append(item_dct)
+
+ c_d_eyes = c_d_eyes_lst[i].astype(np.float32)
+ template_dct['c_d_eyes_lst'].append(c_d_eyes)
+
+ c_d_lip = c_d_lip_lst[i].astype(np.float32)
+ template_dct['c_d_lip_lst'].append(c_d_lip)
+
+ return template_dct
diff --git a/subprocess/LivePortrait/src/live_portrait_wrapper.py b/subprocess/LivePortrait/src/live_portrait_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..8869b952d92041d1da6a546be3b14baa02aa57a5
--- /dev/null
+++ b/subprocess/LivePortrait/src/live_portrait_wrapper.py
@@ -0,0 +1,311 @@
+# coding: utf-8
+
+"""
+Wrapper for LivePortrait core functions
+"""
+
+import os.path as osp
+import numpy as np
+import cv2
+import torch
+import yaml
+
+from .utils.timer import Timer
+from .utils.helper import load_model, concat_feat
+from .utils.camera import headpose_pred_to_degree, get_rotation_matrix
+from .utils.retargeting_utils import calc_eye_close_ratio, calc_lip_close_ratio
+from .config.inference_config import InferenceConfig
+from .utils.rprint import rlog as log
+
+
+class LivePortraitWrapper(object):
+
+ def __init__(self, inference_cfg: InferenceConfig):
+
+ self.inference_cfg = inference_cfg
+ self.device_id = inference_cfg.device_id
+ if inference_cfg.flag_force_cpu:
+ self.device = 'cpu'
+ else:
+ self.device = 'cuda:' + str(self.device_id)
+
+ model_config = yaml.load(open(inference_cfg.models_config, 'r'), Loader=yaml.SafeLoader)
+ # init F
+ self.appearance_feature_extractor = load_model(inference_cfg.checkpoint_F, model_config, self.device, 'appearance_feature_extractor')
+ log(f'Load appearance_feature_extractor done.')
+ # init M
+ self.motion_extractor = load_model(inference_cfg.checkpoint_M, model_config, self.device, 'motion_extractor')
+ log(f'Load motion_extractor done.')
+ # init W
+ self.warping_module = load_model(inference_cfg.checkpoint_W, model_config, self.device, 'warping_module')
+ log(f'Load warping_module done.')
+ # init G
+ self.spade_generator = load_model(inference_cfg.checkpoint_G, model_config, self.device, 'spade_generator')
+ log(f'Load spade_generator done.')
+ # init S and R
+ if inference_cfg.checkpoint_S is not None and osp.exists(inference_cfg.checkpoint_S):
+ self.stitching_retargeting_module = load_model(inference_cfg.checkpoint_S, model_config, self.device, 'stitching_retargeting_module')
+ log(f'Load stitching_retargeting_module done.')
+ else:
+ self.stitching_retargeting_module = None
+
+
+
+ self.timer = Timer()
+
+ def update_config(self, user_args):
+ for k, v in user_args.items():
+ if hasattr(self.inference_cfg, k):
+ setattr(self.inference_cfg, k, v)
+
+ def prepare_source(self, img: np.ndarray) -> torch.Tensor:
+ """ construct the input as standard
+ img: HxWx3, uint8, 256x256
+ """
+ h, w = img.shape[:2]
+ if h != self.inference_cfg.input_shape[0] or w != self.inference_cfg.input_shape[1]:
+ x = cv2.resize(img, (self.inference_cfg.input_shape[0], self.inference_cfg.input_shape[1]))
+ else:
+ x = img.copy()
+
+ if x.ndim == 3:
+ x = x[np.newaxis].astype(np.float32) / 255. # HxWx3 -> 1xHxWx3, normalized to 0~1
+ elif x.ndim == 4:
+ x = x.astype(np.float32) / 255. # BxHxWx3, normalized to 0~1
+ else:
+ raise ValueError(f'img ndim should be 3 or 4: {x.ndim}')
+ x = np.clip(x, 0, 1) # clip to 0~1
+ x = torch.from_numpy(x).permute(0, 3, 1, 2) # 1xHxWx3 -> 1x3xHxW
+ x = x.to(self.device)
+ return x
+
+ def prepare_driving_videos(self, imgs) -> torch.Tensor:
+ """ construct the input as standard
+ imgs: NxBxHxWx3, uint8
+ """
+ if isinstance(imgs, list):
+ _imgs = np.array(imgs)[..., np.newaxis] # TxHxWx3x1
+ elif isinstance(imgs, np.ndarray):
+ _imgs = imgs
+ else:
+ raise ValueError(f'imgs type error: {type(imgs)}')
+
+ y = _imgs.astype(np.float32) / 255.
+ y = np.clip(y, 0, 1) # clip to 0~1
+ y = torch.from_numpy(y).permute(0, 4, 3, 1, 2) # TxHxWx3x1 -> Tx1x3xHxW
+ y = y.to(self.device)
+
+ return y
+
+ def extract_feature_3d(self, x: torch.Tensor) -> torch.Tensor:
+ """ get the appearance feature of the image by F
+ x: Bx3xHxW, normalized to 0~1
+ """
+ with torch.no_grad():
+ with torch.autocast(device_type=self.device[:4], dtype=torch.float16, enabled=self.inference_cfg.flag_use_half_precision):
+ feature_3d = self.appearance_feature_extractor(x)
+
+ return feature_3d.float()
+
+ def get_kp_info(self, x: torch.Tensor, **kwargs) -> dict:
+ """ get the implicit keypoint information
+ x: Bx3xHxW, normalized to 0~1
+ flag_refine_info: whether to trandform the pose to degrees and the dimention of the reshape
+ return: A dict contains keys: 'pitch', 'yaw', 'roll', 't', 'exp', 'scale', 'kp'
+ """
+ with torch.no_grad():
+ with torch.autocast(device_type=self.device[:4], dtype=torch.float16, enabled=self.inference_cfg.flag_use_half_precision):
+ kp_info = self.motion_extractor(x)
+
+ if self.inference_cfg.flag_use_half_precision:
+ # float the dict
+ for k, v in kp_info.items():
+ if isinstance(v, torch.Tensor):
+ kp_info[k] = v.float()
+
+ flag_refine_info: bool = kwargs.get('flag_refine_info', True)
+ if flag_refine_info:
+ bs = kp_info['kp'].shape[0]
+ kp_info['pitch'] = headpose_pred_to_degree(kp_info['pitch'])[:, None] # Bx1
+ kp_info['yaw'] = headpose_pred_to_degree(kp_info['yaw'])[:, None] # Bx1
+ kp_info['roll'] = headpose_pred_to_degree(kp_info['roll'])[:, None] # Bx1
+ kp_info['kp'] = kp_info['kp'].reshape(bs, -1, 3) # BxNx3
+ kp_info['exp'] = kp_info['exp'].reshape(bs, -1, 3) # BxNx3
+
+ return kp_info
+
+ def get_pose_dct(self, kp_info: dict) -> dict:
+ pose_dct = dict(
+ pitch=headpose_pred_to_degree(kp_info['pitch']).item(),
+ yaw=headpose_pred_to_degree(kp_info['yaw']).item(),
+ roll=headpose_pred_to_degree(kp_info['roll']).item(),
+ )
+ return pose_dct
+
+ def get_fs_and_kp_info(self, source_prepared, driving_first_frame):
+
+ # get the canonical keypoints of source image by M
+ source_kp_info = self.get_kp_info(source_prepared, flag_refine_info=True)
+ source_rotation = get_rotation_matrix(source_kp_info['pitch'], source_kp_info['yaw'], source_kp_info['roll'])
+
+ # get the canonical keypoints of first driving frame by M
+ driving_first_frame_kp_info = self.get_kp_info(driving_first_frame, flag_refine_info=True)
+ driving_first_frame_rotation = get_rotation_matrix(
+ driving_first_frame_kp_info['pitch'],
+ driving_first_frame_kp_info['yaw'],
+ driving_first_frame_kp_info['roll']
+ )
+
+ # get feature volume by F
+ source_feature_3d = self.extract_feature_3d(source_prepared)
+
+ return source_kp_info, source_rotation, source_feature_3d, driving_first_frame_kp_info, driving_first_frame_rotation
+
+ def transform_keypoint(self, kp_info: dict):
+ """
+ transform the implicit keypoints with the pose, shift, and expression deformation
+ kp: BxNx3
+ """
+ kp = kp_info['kp'] # (bs, k, 3)
+ pitch, yaw, roll = kp_info['pitch'], kp_info['yaw'], kp_info['roll']
+
+ t, exp = kp_info['t'], kp_info['exp']
+ scale = kp_info['scale']
+
+ pitch = headpose_pred_to_degree(pitch)
+ yaw = headpose_pred_to_degree(yaw)
+ roll = headpose_pred_to_degree(roll)
+
+ bs = kp.shape[0]
+ if kp.ndim == 2:
+ num_kp = kp.shape[1] // 3 # Bx(num_kpx3)
+ else:
+ num_kp = kp.shape[1] # Bxnum_kpx3
+
+ rot_mat = get_rotation_matrix(pitch, yaw, roll) # (bs, 3, 3)
+
+ # Eqn.2: s * (R * x_c,s + exp) + t
+ kp_transformed = kp.view(bs, num_kp, 3) @ rot_mat + exp.view(bs, num_kp, 3)
+ kp_transformed *= scale[..., None] # (bs, k, 3) * (bs, 1, 1) = (bs, k, 3)
+ kp_transformed[:, :, 0:2] += t[:, None, 0:2] # remove z, only apply tx ty
+
+ return kp_transformed
+
+ def retarget_eye(self, kp_source: torch.Tensor, eye_close_ratio: torch.Tensor) -> torch.Tensor:
+ """
+ kp_source: BxNx3
+ eye_close_ratio: Bx3
+ Return: Bx(3*num_kp+2)
+ """
+ feat_eye = concat_feat(kp_source, eye_close_ratio)
+
+ with torch.no_grad():
+ delta = self.stitching_retargeting_module['eye'](feat_eye)
+
+ return delta
+
+ def retarget_lip(self, kp_source: torch.Tensor, lip_close_ratio: torch.Tensor) -> torch.Tensor:
+ """
+ kp_source: BxNx3
+ lip_close_ratio: Bx2
+ """
+ feat_lip = concat_feat(kp_source, lip_close_ratio)
+
+ with torch.no_grad():
+ delta = self.stitching_retargeting_module['lip'](feat_lip)
+
+ return delta
+
+ def stitch(self, kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
+ """
+ kp_source: BxNx3
+ kp_driving: BxNx3
+ Return: Bx(3*num_kp+2)
+ """
+ feat_stiching = concat_feat(kp_source, kp_driving)
+
+ with torch.no_grad():
+ delta = self.stitching_retargeting_module['stitching'](feat_stiching)
+
+ return delta
+
+ def stitching(self, kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
+ """ conduct the stitching
+ kp_source: Bxnum_kpx3
+ kp_driving: Bxnum_kpx3
+ """
+
+ if self.stitching_retargeting_module is not None:
+
+ bs, num_kp = kp_source.shape[:2]
+
+ kp_driving_new = kp_driving.clone()
+ delta = self.stitch(kp_source, kp_driving_new)
+
+ delta_exp = delta[..., :3*num_kp].reshape(bs, num_kp, 3) # 1x20x3
+ delta_tx_ty = delta[..., 3*num_kp:3*num_kp+2].reshape(bs, 1, 2) # 1x1x2
+
+ kp_driving_new += delta_exp
+ kp_driving_new[..., :2] += delta_tx_ty
+
+ return kp_driving_new
+
+ return kp_driving
+
+ def warp_decode(self, feature_3d: torch.Tensor, kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
+ """ get the image after the warping of the implicit keypoints
+ feature_3d: Bx32x16x64x64, feature volume
+ kp_source: BxNx3
+ kp_driving: BxNx3
+ """
+ # The line 18 in Algorithm 1: D(W(f_s; x_s, x′_d,i))
+ with torch.no_grad():
+ with torch.autocast(device_type=self.device[:4], dtype=torch.float16, enabled=self.inference_cfg.flag_use_half_precision):
+ # get decoder input
+ ret_dct = self.warping_module(feature_3d, kp_source=kp_source, kp_driving=kp_driving)
+ # decode
+ ret_dct['out'] = self.spade_generator(feature=ret_dct['out'])
+
+ # float the dict
+ if self.inference_cfg.flag_use_half_precision:
+ for k, v in ret_dct.items():
+ if isinstance(v, torch.Tensor):
+ ret_dct[k] = v.float()
+
+ return ret_dct
+
+ def parse_output(self, out: torch.Tensor) -> np.ndarray:
+ """ construct the output as standard
+ return: 1xHxWx3, uint8
+ """
+ out = np.transpose(out.data.cpu().numpy(), [0, 2, 3, 1]) # 1x3xHxW -> 1xHxWx3
+ out = np.clip(out, 0, 1) # clip to 0~1
+ out = np.clip(out * 255, 0, 255).astype(np.uint8) # 0~1 -> 0~255
+
+ return out
+
+ def calc_driving_ratio(self, driving_lmk_lst):
+ input_eye_ratio_lst = []
+ input_lip_ratio_lst = []
+ for lmk in driving_lmk_lst:
+ # for eyes retargeting
+ input_eye_ratio_lst.append(calc_eye_close_ratio(lmk[None]))
+ # for lip retargeting
+ input_lip_ratio_lst.append(calc_lip_close_ratio(lmk[None]))
+ return input_eye_ratio_lst, input_lip_ratio_lst
+
+ def calc_combined_eye_ratio(self, c_d_eyes_i, source_lmk):
+ c_s_eyes = calc_eye_close_ratio(source_lmk[None])
+ c_s_eyes_tensor = torch.from_numpy(c_s_eyes).float().to(self.device)
+ c_d_eyes_i_tensor = torch.Tensor([c_d_eyes_i[0][0]]).reshape(1, 1).to(self.device)
+ # [c_s,eyes, c_d,eyes,i]
+ combined_eye_ratio_tensor = torch.cat([c_s_eyes_tensor, c_d_eyes_i_tensor], dim=1)
+ return combined_eye_ratio_tensor
+
+ def calc_combined_lip_ratio(self, c_d_lip_i, source_lmk):
+ c_s_lip = calc_lip_close_ratio(source_lmk[None])
+ c_s_lip_tensor = torch.from_numpy(c_s_lip).float().to(self.device)
+ c_d_lip_i_tensor = torch.Tensor([c_d_lip_i[0]]).to(self.device).reshape(1, 1) # 1x1
+ # [c_s,lip, c_d,lip,i]
+ combined_lip_ratio_tensor = torch.cat([c_s_lip_tensor, c_d_lip_i_tensor], dim=1) # 1x2
+ return combined_lip_ratio_tensor
diff --git a/subprocess/LivePortrait/src/modules/__init__.py b/subprocess/LivePortrait/src/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b9de48b6778d3a65edc3166502f82fc6e9b4c2c
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd5f1e3a9fdfc2b0fa357e78b977515b20ca1efc
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..03d71758976c98dcd5b196ad3431ac965edad22f
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c62a9a2580a2d4587c972d8078e2165ca079fd6
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/appearance_feature_extractor.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab96ef4fde53e58880d8df0d11e8d458b5cae8d5
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..858f86322482623b06e20f69c4b88c8ae75eb6a4
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/convnextv2.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebc20e1307f6a86564c2a630715bbb7204b0a698
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b99bfd897f44b08208810951c51087c5462de619
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/dense_motion.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b0d8d85820a8770be6f2ca4ad3e3bc2ac87d050
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..295edfb20175e947bd484c9ddf68e21ce6fe7f86
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/motion_extractor.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a7771089392e07648152d82ff55876e3c17498f
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e05b3789777a193eb79f1d43d8be3bd1e62c957c
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/spade_generator.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a447a381b2e64e284cb2b32b0e4ef95a8fcc8ac1
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1d203aafea85e559f6c129e1903c93fd1bf9d476
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/stitching_retargeting_network.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c5ef1e3df94df943ffde20545e0341130165d0fe
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99af4b944ecc146fe2aedcb10f12257cac713a9a
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/util.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-310.pyc b/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76500301f1babdb5471737bc8370e4425f6b3f0b
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-39.pyc b/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8152c62aa6a25998f9b438699401acdb81b00d7b
Binary files /dev/null and b/subprocess/LivePortrait/src/modules/__pycache__/warping_network.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/modules/appearance_feature_extractor.py b/subprocess/LivePortrait/src/modules/appearance_feature_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d89e4f18a2fbe58447f52ab4c5e3f2011a4ec80
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/appearance_feature_extractor.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+
+"""
+Appearance extractor(F) defined in paper, which maps the source image s to a 3D appearance feature volume.
+"""
+
+import torch
+from torch import nn
+from .util import SameBlock2d, DownBlock2d, ResBlock3d
+
+
+class AppearanceFeatureExtractor(nn.Module):
+
+ def __init__(self, image_channel, block_expansion, num_down_blocks, max_features, reshape_channel, reshape_depth, num_resblocks):
+ super(AppearanceFeatureExtractor, self).__init__()
+ self.image_channel = image_channel
+ self.block_expansion = block_expansion
+ self.num_down_blocks = num_down_blocks
+ self.max_features = max_features
+ self.reshape_channel = reshape_channel
+ self.reshape_depth = reshape_depth
+
+ self.first = SameBlock2d(image_channel, block_expansion, kernel_size=(3, 3), padding=(1, 1))
+
+ down_blocks = []
+ for i in range(num_down_blocks):
+ in_features = min(max_features, block_expansion * (2 ** i))
+ out_features = min(max_features, block_expansion * (2 ** (i + 1)))
+ down_blocks.append(DownBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
+ self.down_blocks = nn.ModuleList(down_blocks)
+
+ self.second = nn.Conv2d(in_channels=out_features, out_channels=max_features, kernel_size=1, stride=1)
+
+ self.resblocks_3d = torch.nn.Sequential()
+ for i in range(num_resblocks):
+ self.resblocks_3d.add_module('3dr' + str(i), ResBlock3d(reshape_channel, kernel_size=3, padding=1))
+
+ def forward(self, source_image):
+ out = self.first(source_image) # Bx3x256x256 -> Bx64x256x256
+
+ for i in range(len(self.down_blocks)):
+ out = self.down_blocks[i](out)
+ out = self.second(out)
+ bs, c, h, w = out.shape # ->Bx512x64x64
+
+ f_s = out.view(bs, self.reshape_channel, self.reshape_depth, h, w) # ->Bx32x16x64x64
+ f_s = self.resblocks_3d(f_s) # ->Bx32x16x64x64
+ return f_s
diff --git a/subprocess/LivePortrait/src/modules/convnextv2.py b/subprocess/LivePortrait/src/modules/convnextv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..83ea12662b607854915df8c7abb160b588d330b1
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/convnextv2.py
@@ -0,0 +1,149 @@
+# coding: utf-8
+
+"""
+This moudle is adapted to the ConvNeXtV2 version for the extraction of implicit keypoints, poses, and expression deformation.
+"""
+
+import torch
+import torch.nn as nn
+# from timm.models.layers import trunc_normal_, DropPath
+from .util import LayerNorm, DropPath, trunc_normal_, GRN
+
+__all__ = ['convnextv2_tiny']
+
+
+class Block(nn.Module):
+ """ ConvNeXtV2 Block.
+
+ Args:
+ dim (int): Number of input channels.
+ drop_path (float): Stochastic depth rate. Default: 0.0
+ """
+
+ def __init__(self, dim, drop_path=0.):
+ super().__init__()
+ self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+ self.norm = LayerNorm(dim, eps=1e-6)
+ self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+ self.act = nn.GELU()
+ self.grn = GRN(4 * dim)
+ self.pwconv2 = nn.Linear(4 * dim, dim)
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+ def forward(self, x):
+ input = x
+ x = self.dwconv(x)
+ x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+ x = self.norm(x)
+ x = self.pwconv1(x)
+ x = self.act(x)
+ x = self.grn(x)
+ x = self.pwconv2(x)
+ x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+
+ x = input + self.drop_path(x)
+ return x
+
+
+class ConvNeXtV2(nn.Module):
+ """ ConvNeXt V2
+
+ Args:
+ in_chans (int): Number of input image channels. Default: 3
+ num_classes (int): Number of classes for classification head. Default: 1000
+ depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+ dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+ drop_path_rate (float): Stochastic depth rate. Default: 0.
+ head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+ """
+
+ def __init__(
+ self,
+ in_chans=3,
+ depths=[3, 3, 9, 3],
+ dims=[96, 192, 384, 768],
+ drop_path_rate=0.,
+ **kwargs
+ ):
+ super().__init__()
+ self.depths = depths
+ self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+ stem = nn.Sequential(
+ nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+ LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+ )
+ self.downsample_layers.append(stem)
+ for i in range(3):
+ downsample_layer = nn.Sequential(
+ LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+ nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+ )
+ self.downsample_layers.append(downsample_layer)
+
+ self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+ dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+ cur = 0
+ for i in range(4):
+ stage = nn.Sequential(
+ *[Block(dim=dims[i], drop_path=dp_rates[cur + j]) for j in range(depths[i])]
+ )
+ self.stages.append(stage)
+ cur += depths[i]
+
+ self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+
+ # NOTE: the output semantic items
+ num_bins = kwargs.get('num_bins', 66)
+ num_kp = kwargs.get('num_kp', 24) # the number of implicit keypoints
+ self.fc_kp = nn.Linear(dims[-1], 3 * num_kp) # implicit keypoints
+
+ # print('dims[-1]: ', dims[-1])
+ self.fc_scale = nn.Linear(dims[-1], 1) # scale
+ self.fc_pitch = nn.Linear(dims[-1], num_bins) # pitch bins
+ self.fc_yaw = nn.Linear(dims[-1], num_bins) # yaw bins
+ self.fc_roll = nn.Linear(dims[-1], num_bins) # roll bins
+ self.fc_t = nn.Linear(dims[-1], 3) # translation
+ self.fc_exp = nn.Linear(dims[-1], 3 * num_kp) # expression / delta
+
+ def _init_weights(self, m):
+ if isinstance(m, (nn.Conv2d, nn.Linear)):
+ trunc_normal_(m.weight, std=.02)
+ nn.init.constant_(m.bias, 0)
+
+ def forward_features(self, x):
+ for i in range(4):
+ x = self.downsample_layers[i](x)
+ x = self.stages[i](x)
+ return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+
+ def forward(self, x):
+ x = self.forward_features(x)
+
+ # implicit keypoints
+ kp = self.fc_kp(x)
+
+ # pose and expression deformation
+ pitch = self.fc_pitch(x)
+ yaw = self.fc_yaw(x)
+ roll = self.fc_roll(x)
+ t = self.fc_t(x)
+ exp = self.fc_exp(x)
+ scale = self.fc_scale(x)
+
+ ret_dct = {
+ 'pitch': pitch,
+ 'yaw': yaw,
+ 'roll': roll,
+ 't': t,
+ 'exp': exp,
+ 'scale': scale,
+
+ 'kp': kp, # canonical keypoint
+ }
+
+ return ret_dct
+
+
+def convnextv2_tiny(**kwargs):
+ model = ConvNeXtV2(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
+ return model
diff --git a/subprocess/LivePortrait/src/modules/dense_motion.py b/subprocess/LivePortrait/src/modules/dense_motion.py
new file mode 100644
index 0000000000000000000000000000000000000000..0eec0c46345f8854b125a51eaee730bd4ee77f7d
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/dense_motion.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+
+"""
+The module that predicting a dense motion from sparse motion representation given by kp_source and kp_driving
+"""
+
+from torch import nn
+import torch.nn.functional as F
+import torch
+from .util import Hourglass, make_coordinate_grid, kp2gaussian
+
+
+class DenseMotionNetwork(nn.Module):
+ def __init__(self, block_expansion, num_blocks, max_features, num_kp, feature_channel, reshape_depth, compress, estimate_occlusion_map=True):
+ super(DenseMotionNetwork, self).__init__()
+ self.hourglass = Hourglass(block_expansion=block_expansion, in_features=(num_kp+1)*(compress+1), max_features=max_features, num_blocks=num_blocks) # ~60+G
+
+ self.mask = nn.Conv3d(self.hourglass.out_filters, num_kp + 1, kernel_size=7, padding=3) # 65G! NOTE: computation cost is large
+ self.compress = nn.Conv3d(feature_channel, compress, kernel_size=1) # 0.8G
+ self.norm = nn.BatchNorm3d(compress, affine=True)
+ self.num_kp = num_kp
+ self.flag_estimate_occlusion_map = estimate_occlusion_map
+
+ if self.flag_estimate_occlusion_map:
+ self.occlusion = nn.Conv2d(self.hourglass.out_filters*reshape_depth, 1, kernel_size=7, padding=3)
+ else:
+ self.occlusion = None
+
+ def create_sparse_motions(self, feature, kp_driving, kp_source):
+ bs, _, d, h, w = feature.shape # (bs, 4, 16, 64, 64)
+ identity_grid = make_coordinate_grid((d, h, w), ref=kp_source) # (16, 64, 64, 3)
+ identity_grid = identity_grid.view(1, 1, d, h, w, 3) # (1, 1, d=16, h=64, w=64, 3)
+ coordinate_grid = identity_grid - kp_driving.view(bs, self.num_kp, 1, 1, 1, 3)
+
+ k = coordinate_grid.shape[1]
+
+ # NOTE: there lacks an one-order flow
+ driving_to_source = coordinate_grid + kp_source.view(bs, self.num_kp, 1, 1, 1, 3) # (bs, num_kp, d, h, w, 3)
+
+ # adding background feature
+ identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1, 1)
+ sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1) # (bs, 1+num_kp, d, h, w, 3)
+ return sparse_motions
+
+ def create_deformed_feature(self, feature, sparse_motions):
+ bs, _, d, h, w = feature.shape
+ feature_repeat = feature.unsqueeze(1).unsqueeze(1).repeat(1, self.num_kp+1, 1, 1, 1, 1, 1) # (bs, num_kp+1, 1, c, d, h, w)
+ feature_repeat = feature_repeat.view(bs * (self.num_kp+1), -1, d, h, w) # (bs*(num_kp+1), c, d, h, w)
+ sparse_motions = sparse_motions.view((bs * (self.num_kp+1), d, h, w, -1)) # (bs*(num_kp+1), d, h, w, 3)
+ sparse_deformed = F.grid_sample(feature_repeat, sparse_motions, align_corners=False)
+ sparse_deformed = sparse_deformed.view((bs, self.num_kp+1, -1, d, h, w)) # (bs, num_kp+1, c, d, h, w)
+
+ return sparse_deformed
+
+ def create_heatmap_representations(self, feature, kp_driving, kp_source):
+ spatial_size = feature.shape[3:] # (d=16, h=64, w=64)
+ gaussian_driving = kp2gaussian(kp_driving, spatial_size=spatial_size, kp_variance=0.01) # (bs, num_kp, d, h, w)
+ gaussian_source = kp2gaussian(kp_source, spatial_size=spatial_size, kp_variance=0.01) # (bs, num_kp, d, h, w)
+ heatmap = gaussian_driving - gaussian_source # (bs, num_kp, d, h, w)
+
+ # adding background feature
+ zeros = torch.zeros(heatmap.shape[0], 1, spatial_size[0], spatial_size[1], spatial_size[2]).type(heatmap.type()).to(heatmap.device)
+ heatmap = torch.cat([zeros, heatmap], dim=1)
+ heatmap = heatmap.unsqueeze(2) # (bs, 1+num_kp, 1, d, h, w)
+ return heatmap
+
+ def forward(self, feature, kp_driving, kp_source):
+ bs, _, d, h, w = feature.shape # (bs, 32, 16, 64, 64)
+
+ feature = self.compress(feature) # (bs, 4, 16, 64, 64)
+ feature = self.norm(feature) # (bs, 4, 16, 64, 64)
+ feature = F.relu(feature) # (bs, 4, 16, 64, 64)
+
+ out_dict = dict()
+
+ # 1. deform 3d feature
+ sparse_motion = self.create_sparse_motions(feature, kp_driving, kp_source) # (bs, 1+num_kp, d, h, w, 3)
+ deformed_feature = self.create_deformed_feature(feature, sparse_motion) # (bs, 1+num_kp, c=4, d=16, h=64, w=64)
+
+ # 2. (bs, 1+num_kp, d, h, w)
+ heatmap = self.create_heatmap_representations(deformed_feature, kp_driving, kp_source) # (bs, 1+num_kp, 1, d, h, w)
+
+ input = torch.cat([heatmap, deformed_feature], dim=2) # (bs, 1+num_kp, c=5, d=16, h=64, w=64)
+ input = input.view(bs, -1, d, h, w) # (bs, (1+num_kp)*c=105, d=16, h=64, w=64)
+
+ prediction = self.hourglass(input)
+
+ mask = self.mask(prediction)
+ mask = F.softmax(mask, dim=1) # (bs, 1+num_kp, d=16, h=64, w=64)
+ out_dict['mask'] = mask
+ mask = mask.unsqueeze(2) # (bs, num_kp+1, 1, d, h, w)
+ sparse_motion = sparse_motion.permute(0, 1, 5, 2, 3, 4) # (bs, num_kp+1, 3, d, h, w)
+ deformation = (sparse_motion * mask).sum(dim=1) # (bs, 3, d, h, w) mask take effect in this place
+ deformation = deformation.permute(0, 2, 3, 4, 1) # (bs, d, h, w, 3)
+
+ out_dict['deformation'] = deformation
+
+ if self.flag_estimate_occlusion_map:
+ bs, _, d, h, w = prediction.shape
+ prediction_reshape = prediction.view(bs, -1, h, w)
+ occlusion_map = torch.sigmoid(self.occlusion(prediction_reshape)) # Bx1x64x64
+ out_dict['occlusion_map'] = occlusion_map
+
+ return out_dict
diff --git a/subprocess/LivePortrait/src/modules/motion_extractor.py b/subprocess/LivePortrait/src/modules/motion_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2982e53c52d9ec1e0bec0453cc05edb51a15d23
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/motion_extractor.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+
+"""
+Motion extractor(M), which directly predicts the canonical keypoints, head pose and expression deformation of the input image
+"""
+
+from torch import nn
+import torch
+
+from .convnextv2 import convnextv2_tiny
+from .util import filter_state_dict
+
+model_dict = {
+ 'convnextv2_tiny': convnextv2_tiny,
+}
+
+
+class MotionExtractor(nn.Module):
+ def __init__(self, **kwargs):
+ super(MotionExtractor, self).__init__()
+
+ # default is convnextv2_base
+ backbone = kwargs.get('backbone', 'convnextv2_tiny')
+ self.detector = model_dict.get(backbone)(**kwargs)
+
+ def load_pretrained(self, init_path: str):
+ if init_path not in (None, ''):
+ state_dict = torch.load(init_path, map_location=lambda storage, loc: storage)['model']
+ state_dict = filter_state_dict(state_dict, remove_name='head')
+ ret = self.detector.load_state_dict(state_dict, strict=False)
+ print(f'Load pretrained model from {init_path}, ret: {ret}')
+
+ def forward(self, x):
+ out = self.detector(x)
+ return out
diff --git a/subprocess/LivePortrait/src/modules/spade_generator.py b/subprocess/LivePortrait/src/modules/spade_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..147a9aed0c7707fe6ae3d59ce1a30154ef75afcc
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/spade_generator.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+
+"""
+Spade decoder(G) defined in the paper, which input the warped feature to generate the animated image.
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .util import SPADEResnetBlock
+
+
+class SPADEDecoder(nn.Module):
+ def __init__(self, upscale=1, max_features=256, block_expansion=64, out_channels=64, num_down_blocks=2):
+ for i in range(num_down_blocks):
+ input_channels = min(max_features, block_expansion * (2 ** (i + 1)))
+ self.upscale = upscale
+ super().__init__()
+ norm_G = 'spadespectralinstance'
+ label_num_channels = input_channels # 256
+
+ self.fc = nn.Conv2d(input_channels, 2 * input_channels, 3, padding=1)
+ self.G_middle_0 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.G_middle_1 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.G_middle_2 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.G_middle_3 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.G_middle_4 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.G_middle_5 = SPADEResnetBlock(2 * input_channels, 2 * input_channels, norm_G, label_num_channels)
+ self.up_0 = SPADEResnetBlock(2 * input_channels, input_channels, norm_G, label_num_channels)
+ self.up_1 = SPADEResnetBlock(input_channels, out_channels, norm_G, label_num_channels)
+ self.up = nn.Upsample(scale_factor=2)
+
+ if self.upscale is None or self.upscale <= 1:
+ self.conv_img = nn.Conv2d(out_channels, 3, 3, padding=1)
+ else:
+ self.conv_img = nn.Sequential(
+ nn.Conv2d(out_channels, 3 * (2 * 2), kernel_size=3, padding=1),
+ nn.PixelShuffle(upscale_factor=2)
+ )
+
+ def forward(self, feature):
+ seg = feature # Bx256x64x64
+ x = self.fc(feature) # Bx512x64x64
+ x = self.G_middle_0(x, seg)
+ x = self.G_middle_1(x, seg)
+ x = self.G_middle_2(x, seg)
+ x = self.G_middle_3(x, seg)
+ x = self.G_middle_4(x, seg)
+ x = self.G_middle_5(x, seg)
+
+ x = self.up(x) # Bx512x64x64 -> Bx512x128x128
+ x = self.up_0(x, seg) # Bx512x128x128 -> Bx256x128x128
+ x = self.up(x) # Bx256x128x128 -> Bx256x256x256
+ x = self.up_1(x, seg) # Bx256x256x256 -> Bx64x256x256
+
+ x = self.conv_img(F.leaky_relu(x, 2e-1)) # Bx64x256x256 -> Bx3xHxW
+ x = torch.sigmoid(x) # Bx3xHxW
+
+ return x
\ No newline at end of file
diff --git a/subprocess/LivePortrait/src/modules/stitching_retargeting_network.py b/subprocess/LivePortrait/src/modules/stitching_retargeting_network.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f50b7cf5a21cd71c70a7bbaaa4b6b68b4762ea3
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/stitching_retargeting_network.py
@@ -0,0 +1,38 @@
+# coding: utf-8
+
+"""
+Stitching module(S) and two retargeting modules(R) defined in the paper.
+
+- The stitching module pastes the animated portrait back into the original image space without pixel misalignment, such as in
+the stitching region.
+
+- The eyes retargeting module is designed to address the issue of incomplete eye closure during cross-id reenactment, especially
+when a person with small eyes drives a person with larger eyes.
+
+- The lip retargeting module is designed similarly to the eye retargeting module, and can also normalize the input by ensuring that
+the lips are in a closed state, which facilitates better animation driving.
+"""
+from torch import nn
+
+
+class StitchingRetargetingNetwork(nn.Module):
+ def __init__(self, input_size, hidden_sizes, output_size):
+ super(StitchingRetargetingNetwork, self).__init__()
+ layers = []
+ for i in range(len(hidden_sizes)):
+ if i == 0:
+ layers.append(nn.Linear(input_size, hidden_sizes[i]))
+ else:
+ layers.append(nn.Linear(hidden_sizes[i - 1], hidden_sizes[i]))
+ layers.append(nn.ReLU(inplace=True))
+ layers.append(nn.Linear(hidden_sizes[-1], output_size))
+ self.mlp = nn.Sequential(*layers)
+
+ def initialize_weights_to_zero(self):
+ for m in self.modules():
+ if isinstance(m, nn.Linear):
+ nn.init.zeros_(m.weight)
+ nn.init.zeros_(m.bias)
+
+ def forward(self, x):
+ return self.mlp(x)
diff --git a/subprocess/LivePortrait/src/modules/util.py b/subprocess/LivePortrait/src/modules/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..f83980b24372bee38779ceeb3349fca91735e56e
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/util.py
@@ -0,0 +1,441 @@
+# coding: utf-8
+
+"""
+This file defines various neural network modules and utility functions, including convolutional and residual blocks,
+normalizations, and functions for spatial transformation and tensor manipulation.
+"""
+
+from torch import nn
+import torch.nn.functional as F
+import torch
+import torch.nn.utils.spectral_norm as spectral_norm
+import math
+import warnings
+
+
+def kp2gaussian(kp, spatial_size, kp_variance):
+ """
+ Transform a keypoint into gaussian like representation
+ """
+ mean = kp
+
+ coordinate_grid = make_coordinate_grid(spatial_size, mean)
+ number_of_leading_dimensions = len(mean.shape) - 1
+ shape = (1,) * number_of_leading_dimensions + coordinate_grid.shape
+ coordinate_grid = coordinate_grid.view(*shape)
+ repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 1)
+ coordinate_grid = coordinate_grid.repeat(*repeats)
+
+ # Preprocess kp shape
+ shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 3)
+ mean = mean.view(*shape)
+
+ mean_sub = (coordinate_grid - mean)
+
+ out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
+
+ return out
+
+
+def make_coordinate_grid(spatial_size, ref, **kwargs):
+ d, h, w = spatial_size
+ x = torch.arange(w).type(ref.dtype).to(ref.device)
+ y = torch.arange(h).type(ref.dtype).to(ref.device)
+ z = torch.arange(d).type(ref.dtype).to(ref.device)
+
+ # NOTE: must be right-down-in
+ x = (2 * (x / (w - 1)) - 1) # the x axis faces to the right
+ y = (2 * (y / (h - 1)) - 1) # the y axis faces to the bottom
+ z = (2 * (z / (d - 1)) - 1) # the z axis faces to the inner
+
+ yy = y.view(1, -1, 1).repeat(d, 1, w)
+ xx = x.view(1, 1, -1).repeat(d, h, 1)
+ zz = z.view(-1, 1, 1).repeat(1, h, w)
+
+ meshed = torch.cat([xx.unsqueeze_(3), yy.unsqueeze_(3), zz.unsqueeze_(3)], 3)
+
+ return meshed
+
+
+class ConvT2d(nn.Module):
+ """
+ Upsampling block for use in decoder.
+ """
+
+ def __init__(self, in_features, out_features, kernel_size=3, stride=2, padding=1, output_padding=1):
+ super(ConvT2d, self).__init__()
+
+ self.convT = nn.ConvTranspose2d(in_features, out_features, kernel_size=kernel_size, stride=stride,
+ padding=padding, output_padding=output_padding)
+ self.norm = nn.InstanceNorm2d(out_features)
+
+ def forward(self, x):
+ out = self.convT(x)
+ out = self.norm(out)
+ out = F.leaky_relu(out)
+ return out
+
+
+class ResBlock3d(nn.Module):
+ """
+ Res block, preserve spatial resolution.
+ """
+
+ def __init__(self, in_features, kernel_size, padding):
+ super(ResBlock3d, self).__init__()
+ self.conv1 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, padding=padding)
+ self.conv2 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, padding=padding)
+ self.norm1 = nn.BatchNorm3d(in_features, affine=True)
+ self.norm2 = nn.BatchNorm3d(in_features, affine=True)
+
+ def forward(self, x):
+ out = self.norm1(x)
+ out = F.relu(out)
+ out = self.conv1(out)
+ out = self.norm2(out)
+ out = F.relu(out)
+ out = self.conv2(out)
+ out += x
+ return out
+
+
+class UpBlock3d(nn.Module):
+ """
+ Upsampling block for use in decoder.
+ """
+
+ def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+ super(UpBlock3d, self).__init__()
+
+ self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+ padding=padding, groups=groups)
+ self.norm = nn.BatchNorm3d(out_features, affine=True)
+
+ def forward(self, x):
+ out = F.interpolate(x, scale_factor=(1, 2, 2))
+ out = self.conv(out)
+ out = self.norm(out)
+ out = F.relu(out)
+ return out
+
+
+class DownBlock2d(nn.Module):
+ """
+ Downsampling block for use in encoder.
+ """
+
+ def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+ super(DownBlock2d, self).__init__()
+ self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size, padding=padding, groups=groups)
+ self.norm = nn.BatchNorm2d(out_features, affine=True)
+ self.pool = nn.AvgPool2d(kernel_size=(2, 2))
+
+ def forward(self, x):
+ out = self.conv(x)
+ out = self.norm(out)
+ out = F.relu(out)
+ out = self.pool(out)
+ return out
+
+
+class DownBlock3d(nn.Module):
+ """
+ Downsampling block for use in encoder.
+ """
+
+ def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+ super(DownBlock3d, self).__init__()
+ '''
+ self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+ padding=padding, groups=groups, stride=(1, 2, 2))
+ '''
+ self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+ padding=padding, groups=groups)
+ self.norm = nn.BatchNorm3d(out_features, affine=True)
+ self.pool = nn.AvgPool3d(kernel_size=(1, 2, 2))
+
+ def forward(self, x):
+ out = self.conv(x)
+ out = self.norm(out)
+ out = F.relu(out)
+ out = self.pool(out)
+ return out
+
+
+class SameBlock2d(nn.Module):
+ """
+ Simple block, preserve spatial resolution.
+ """
+
+ def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1, lrelu=False):
+ super(SameBlock2d, self).__init__()
+ self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size, padding=padding, groups=groups)
+ self.norm = nn.BatchNorm2d(out_features, affine=True)
+ if lrelu:
+ self.ac = nn.LeakyReLU()
+ else:
+ self.ac = nn.ReLU()
+
+ def forward(self, x):
+ out = self.conv(x)
+ out = self.norm(out)
+ out = self.ac(out)
+ return out
+
+
+class Encoder(nn.Module):
+ """
+ Hourglass Encoder
+ """
+
+ def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+ super(Encoder, self).__init__()
+
+ down_blocks = []
+ for i in range(num_blocks):
+ down_blocks.append(DownBlock3d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)), min(max_features, block_expansion * (2 ** (i + 1))), kernel_size=3, padding=1))
+ self.down_blocks = nn.ModuleList(down_blocks)
+
+ def forward(self, x):
+ outs = [x]
+ for down_block in self.down_blocks:
+ outs.append(down_block(outs[-1]))
+ return outs
+
+
+class Decoder(nn.Module):
+ """
+ Hourglass Decoder
+ """
+
+ def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+ super(Decoder, self).__init__()
+
+ up_blocks = []
+
+ for i in range(num_blocks)[::-1]:
+ in_filters = (1 if i == num_blocks - 1 else 2) * min(max_features, block_expansion * (2 ** (i + 1)))
+ out_filters = min(max_features, block_expansion * (2 ** i))
+ up_blocks.append(UpBlock3d(in_filters, out_filters, kernel_size=3, padding=1))
+
+ self.up_blocks = nn.ModuleList(up_blocks)
+ self.out_filters = block_expansion + in_features
+
+ self.conv = nn.Conv3d(in_channels=self.out_filters, out_channels=self.out_filters, kernel_size=3, padding=1)
+ self.norm = nn.BatchNorm3d(self.out_filters, affine=True)
+
+ def forward(self, x):
+ out = x.pop()
+ for up_block in self.up_blocks:
+ out = up_block(out)
+ skip = x.pop()
+ out = torch.cat([out, skip], dim=1)
+ out = self.conv(out)
+ out = self.norm(out)
+ out = F.relu(out)
+ return out
+
+
+class Hourglass(nn.Module):
+ """
+ Hourglass architecture.
+ """
+
+ def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+ super(Hourglass, self).__init__()
+ self.encoder = Encoder(block_expansion, in_features, num_blocks, max_features)
+ self.decoder = Decoder(block_expansion, in_features, num_blocks, max_features)
+ self.out_filters = self.decoder.out_filters
+
+ def forward(self, x):
+ return self.decoder(self.encoder(x))
+
+
+class SPADE(nn.Module):
+ def __init__(self, norm_nc, label_nc):
+ super().__init__()
+
+ self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False)
+ nhidden = 128
+
+ self.mlp_shared = nn.Sequential(
+ nn.Conv2d(label_nc, nhidden, kernel_size=3, padding=1),
+ nn.ReLU())
+ self.mlp_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
+ self.mlp_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
+
+ def forward(self, x, segmap):
+ normalized = self.param_free_norm(x)
+ segmap = F.interpolate(segmap, size=x.size()[2:], mode='nearest')
+ actv = self.mlp_shared(segmap)
+ gamma = self.mlp_gamma(actv)
+ beta = self.mlp_beta(actv)
+ out = normalized * (1 + gamma) + beta
+ return out
+
+
+class SPADEResnetBlock(nn.Module):
+ def __init__(self, fin, fout, norm_G, label_nc, use_se=False, dilation=1):
+ super().__init__()
+ # Attributes
+ self.learned_shortcut = (fin != fout)
+ fmiddle = min(fin, fout)
+ self.use_se = use_se
+ # create conv layers
+ self.conv_0 = nn.Conv2d(fin, fmiddle, kernel_size=3, padding=dilation, dilation=dilation)
+ self.conv_1 = nn.Conv2d(fmiddle, fout, kernel_size=3, padding=dilation, dilation=dilation)
+ if self.learned_shortcut:
+ self.conv_s = nn.Conv2d(fin, fout, kernel_size=1, bias=False)
+ # apply spectral norm if specified
+ if 'spectral' in norm_G:
+ self.conv_0 = spectral_norm(self.conv_0)
+ self.conv_1 = spectral_norm(self.conv_1)
+ if self.learned_shortcut:
+ self.conv_s = spectral_norm(self.conv_s)
+ # define normalization layers
+ self.norm_0 = SPADE(fin, label_nc)
+ self.norm_1 = SPADE(fmiddle, label_nc)
+ if self.learned_shortcut:
+ self.norm_s = SPADE(fin, label_nc)
+
+ def forward(self, x, seg1):
+ x_s = self.shortcut(x, seg1)
+ dx = self.conv_0(self.actvn(self.norm_0(x, seg1)))
+ dx = self.conv_1(self.actvn(self.norm_1(dx, seg1)))
+ out = x_s + dx
+ return out
+
+ def shortcut(self, x, seg1):
+ if self.learned_shortcut:
+ x_s = self.conv_s(self.norm_s(x, seg1))
+ else:
+ x_s = x
+ return x_s
+
+ def actvn(self, x):
+ return F.leaky_relu(x, 2e-1)
+
+
+def filter_state_dict(state_dict, remove_name='fc'):
+ new_state_dict = {}
+ for key in state_dict:
+ if remove_name in key:
+ continue
+ new_state_dict[key] = state_dict[key]
+ return new_state_dict
+
+
+class GRN(nn.Module):
+ """ GRN (Global Response Normalization) layer
+ """
+
+ def __init__(self, dim):
+ super().__init__()
+ self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
+ self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
+
+ def forward(self, x):
+ Gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
+ Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
+ return self.gamma * (x * Nx) + self.beta + x
+
+
+class LayerNorm(nn.Module):
+ r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+ The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+ shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+ with shape (batch_size, channels, height, width).
+ """
+
+ def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(normalized_shape))
+ self.bias = nn.Parameter(torch.zeros(normalized_shape))
+ self.eps = eps
+ self.data_format = data_format
+ if self.data_format not in ["channels_last", "channels_first"]:
+ raise NotImplementedError
+ self.normalized_shape = (normalized_shape, )
+
+ def forward(self, x):
+ if self.data_format == "channels_last":
+ return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+ elif self.data_format == "channels_first":
+ u = x.mean(1, keepdim=True)
+ s = (x - u).pow(2).mean(1, keepdim=True)
+ x = (x - u) / torch.sqrt(s + self.eps)
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
+ return x
+
+
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+ # Cut & paste from PyTorch official master until it's in a few official releases - RW
+ # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+ def norm_cdf(x):
+ # Computes standard normal cumulative distribution function
+ return (1. + math.erf(x / math.sqrt(2.))) / 2.
+
+ if (mean < a - 2 * std) or (mean > b + 2 * std):
+ warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+ "The distribution of values may be incorrect.",
+ stacklevel=2)
+
+ with torch.no_grad():
+ # Values are generated by using a truncated uniform distribution and
+ # then using the inverse CDF for the normal distribution.
+ # Get upper and lower cdf values
+ l = norm_cdf((a - mean) / std)
+ u = norm_cdf((b - mean) / std)
+
+ # Uniformly fill tensor with values from [l, u], then translate to
+ # [2l-1, 2u-1].
+ tensor.uniform_(2 * l - 1, 2 * u - 1)
+
+ # Use inverse cdf transform for normal distribution to get truncated
+ # standard normal
+ tensor.erfinv_()
+
+ # Transform to proper mean, std
+ tensor.mul_(std * math.sqrt(2.))
+ tensor.add_(mean)
+
+ # Clamp to ensure it's in the proper range
+ tensor.clamp_(min=a, max=b)
+ return tensor
+
+
+def drop_path(x, drop_prob=0., training=False, scale_by_keep=True):
+ """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+
+ This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+ the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+ See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+ changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+ 'survival rate' as the argument.
+
+ """
+ if drop_prob == 0. or not training:
+ return x
+ keep_prob = 1 - drop_prob
+ shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
+ random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+ if keep_prob > 0.0 and scale_by_keep:
+ random_tensor.div_(keep_prob)
+ return x * random_tensor
+
+
+class DropPath(nn.Module):
+ """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+
+ def __init__(self, drop_prob=None, scale_by_keep=True):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+ self.scale_by_keep = scale_by_keep
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
+
+
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+ return _no_grad_trunc_normal_(tensor, mean, std, a, b)
diff --git a/subprocess/LivePortrait/src/modules/warping_network.py b/subprocess/LivePortrait/src/modules/warping_network.py
new file mode 100644
index 0000000000000000000000000000000000000000..9191a197055a954272ee8ed86c5e34f3f33f9ad5
--- /dev/null
+++ b/subprocess/LivePortrait/src/modules/warping_network.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+
+"""
+Warping field estimator(W) defined in the paper, which generates a warping field using the implicit
+keypoint representations x_s and x_d, and employs this flow field to warp the source feature volume f_s.
+"""
+
+from torch import nn
+import torch.nn.functional as F
+from .util import SameBlock2d
+from .dense_motion import DenseMotionNetwork
+
+
+class WarpingNetwork(nn.Module):
+ def __init__(
+ self,
+ num_kp,
+ block_expansion,
+ max_features,
+ num_down_blocks,
+ reshape_channel,
+ estimate_occlusion_map=False,
+ dense_motion_params=None,
+ **kwargs
+ ):
+ super(WarpingNetwork, self).__init__()
+
+ self.upscale = kwargs.get('upscale', 1)
+ self.flag_use_occlusion_map = kwargs.get('flag_use_occlusion_map', True)
+
+ if dense_motion_params is not None:
+ self.dense_motion_network = DenseMotionNetwork(
+ num_kp=num_kp,
+ feature_channel=reshape_channel,
+ estimate_occlusion_map=estimate_occlusion_map,
+ **dense_motion_params
+ )
+ else:
+ self.dense_motion_network = None
+
+ self.third = SameBlock2d(max_features, block_expansion * (2 ** num_down_blocks), kernel_size=(3, 3), padding=(1, 1), lrelu=True)
+ self.fourth = nn.Conv2d(in_channels=block_expansion * (2 ** num_down_blocks), out_channels=block_expansion * (2 ** num_down_blocks), kernel_size=1, stride=1)
+
+ self.estimate_occlusion_map = estimate_occlusion_map
+
+ def deform_input(self, inp, deformation):
+ return F.grid_sample(inp, deformation, align_corners=False)
+
+ def forward(self, feature_3d, kp_driving, kp_source):
+ if self.dense_motion_network is not None:
+ # Feature warper, Transforming feature representation according to deformation and occlusion
+ dense_motion = self.dense_motion_network(
+ feature=feature_3d, kp_driving=kp_driving, kp_source=kp_source
+ )
+ if 'occlusion_map' in dense_motion:
+ occlusion_map = dense_motion['occlusion_map'] # Bx1x64x64
+ else:
+ occlusion_map = None
+
+ deformation = dense_motion['deformation'] # Bx16x64x64x3
+ out = self.deform_input(feature_3d, deformation) # Bx32x16x64x64
+
+ bs, c, d, h, w = out.shape # Bx32x16x64x64
+ out = out.view(bs, c * d, h, w) # -> Bx512x64x64
+ out = self.third(out) # -> Bx256x64x64
+ out = self.fourth(out) # -> Bx256x64x64
+
+ if self.flag_use_occlusion_map and (occlusion_map is not None):
+ out = out * occlusion_map
+
+ ret_dct = {
+ 'occlusion_map': occlusion_map,
+ 'deformation': deformation,
+ 'out': out,
+ }
+
+ return ret_dct
diff --git a/subprocess/LivePortrait/src/utils/__init__.py b/subprocess/LivePortrait/src/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee1b01adc1301977b13390a8eb96e974a4db2fc6
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ef2e63a298f508062a9cb4f024075962e1ef335
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7d49cf3d36d2e08cc6c7465b4c519734390d70e
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ecda11528c7ab793603e4cf657d4d3cac2cea8c1
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/camera.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01fa177b0f9b398a2474ddb1571210bed56ce68c
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce2d75f950bdb1db08e91a65a0e005375b6f74be
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/crop.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b853cd4e4ef41571b5d8250cfc4b8698a6da6a32
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3a0ffbc0f013fa466c59729f9becc3446b58b5a
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/cropper.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76812d9f695c0487641d6904c0b817d50b169ad4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b1f1dd2538d73b49a49cf400617f48366e607e6
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/face_analysis_diy.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c31fe754bb833000e2dfa6434e396f0fcfa2736
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..461d2acb1e1f3bb0635655fd3e15a9cabe52c04b
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/helper.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d84ef682801ad4aeeff54a49eb08c4bd800a5b0d
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db0e5ddbb944eb44b92b4fc5022aba16e6eddef4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/io.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62165531a6125bbc11da885cde30b874b2274ffd
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4731971977b04a80821b7f4cd5b36d7e993fbec4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/landmark_runner.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..897354aea89c2a7ec5c140d4651f8ca416d27985
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..135a2c881e60f185c121c64cf374f5c8883d526d
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/retargeting_utils.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edc64d81b11fe94116ab90cc42de6a9ff307ea10
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f93e79edf8ed20c2a783c3edb9eeec846e51a175
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/rprint.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47ffdeb755cc0f5bd8b1f7fb4067f720caad936a
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae03ab9fec2329866fa89f3f7f47ed6048cf7c8c
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/timer.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-310.pyc b/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..998449fe0a48928a7bf1176e00767476b56f8207
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-39.pyc b/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc2bafdb9a766783fddd7302ee19e28f3af2b239
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/__pycache__/video.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/camera.py b/subprocess/LivePortrait/src/utils/camera.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3dd942697e1f00a96dc3efc75b883d98b52e525
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/camera.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+
+"""
+functions for processing and transforming 3D facial keypoints
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+PI = np.pi
+
+
+def headpose_pred_to_degree(pred):
+ """
+ pred: (bs, 66) or (bs, 1) or others
+ """
+ if pred.ndim > 1 and pred.shape[1] == 66:
+ # NOTE: note that the average is modified to 97.5
+ device = pred.device
+ idx_tensor = [idx for idx in range(0, 66)]
+ idx_tensor = torch.FloatTensor(idx_tensor).to(device)
+ pred = F.softmax(pred, dim=1)
+ degree = torch.sum(pred*idx_tensor, axis=1) * 3 - 97.5
+
+ return degree
+
+ return pred
+
+
+def get_rotation_matrix(pitch_, yaw_, roll_):
+ """ the input is in degree
+ """
+ # transform to radian
+ pitch = pitch_ / 180 * PI
+ yaw = yaw_ / 180 * PI
+ roll = roll_ / 180 * PI
+
+ device = pitch.device
+
+ if pitch.ndim == 1:
+ pitch = pitch.unsqueeze(1)
+ if yaw.ndim == 1:
+ yaw = yaw.unsqueeze(1)
+ if roll.ndim == 1:
+ roll = roll.unsqueeze(1)
+
+ # calculate the euler matrix
+ bs = pitch.shape[0]
+ ones = torch.ones([bs, 1]).to(device)
+ zeros = torch.zeros([bs, 1]).to(device)
+ x, y, z = pitch, yaw, roll
+
+ rot_x = torch.cat([
+ ones, zeros, zeros,
+ zeros, torch.cos(x), -torch.sin(x),
+ zeros, torch.sin(x), torch.cos(x)
+ ], dim=1).reshape([bs, 3, 3])
+
+ rot_y = torch.cat([
+ torch.cos(y), zeros, torch.sin(y),
+ zeros, ones, zeros,
+ -torch.sin(y), zeros, torch.cos(y)
+ ], dim=1).reshape([bs, 3, 3])
+
+ rot_z = torch.cat([
+ torch.cos(z), -torch.sin(z), zeros,
+ torch.sin(z), torch.cos(z), zeros,
+ zeros, zeros, ones
+ ], dim=1).reshape([bs, 3, 3])
+
+ rot = rot_z @ rot_y @ rot_x
+ return rot.permute(0, 2, 1) # transpose
diff --git a/subprocess/LivePortrait/src/utils/crop.py b/subprocess/LivePortrait/src/utils/crop.py
new file mode 100644
index 0000000000000000000000000000000000000000..065b9f0f9f25be8444b7c9bfca45652f80f5685b
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/crop.py
@@ -0,0 +1,398 @@
+# coding: utf-8
+
+"""
+cropping function and the related preprocess functions for cropping
+"""
+
+import numpy as np
+import os.path as osp
+from math import sin, cos, acos, degrees
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False) # NOTE: enforce single thread
+from .rprint import rprint as print
+
+DTYPE = np.float32
+CV2_INTERP = cv2.INTER_LINEAR
+
+def make_abs_path(fn):
+ return osp.join(osp.dirname(osp.realpath(__file__)), fn)
+
+def _transform_img(img, M, dsize, flags=CV2_INTERP, borderMode=None):
+ """ conduct similarity or affine transformation to the image, do not do border operation!
+ img:
+ M: 2x3 matrix or 3x3 matrix
+ dsize: target shape (width, height)
+ """
+ if isinstance(dsize, tuple) or isinstance(dsize, list):
+ _dsize = tuple(dsize)
+ else:
+ _dsize = (dsize, dsize)
+
+ if borderMode is not None:
+ return cv2.warpAffine(img, M[:2, :], dsize=_dsize, flags=flags, borderMode=borderMode, borderValue=(0, 0, 0))
+ else:
+ return cv2.warpAffine(img, M[:2, :], dsize=_dsize, flags=flags)
+
+
+def _transform_pts(pts, M):
+ """ conduct similarity or affine transformation to the pts
+ pts: Nx2 ndarray
+ M: 2x3 matrix or 3x3 matrix
+ return: Nx2
+ """
+ return pts @ M[:2, :2].T + M[:2, 2]
+
+
+def parse_pt2_from_pt101(pt101, use_lip=True):
+ """
+ parsing the 2 points according to the 101 points, which cancels the roll
+ """
+ # the former version use the eye center, but it is not robust, now use interpolation
+ pt_left_eye = np.mean(pt101[[39, 42, 45, 48]], axis=0) # left eye center
+ pt_right_eye = np.mean(pt101[[51, 54, 57, 60]], axis=0) # right eye center
+
+ if use_lip:
+ # use lip
+ pt_center_eye = (pt_left_eye + pt_right_eye) / 2
+ pt_center_lip = (pt101[75] + pt101[81]) / 2
+ pt2 = np.stack([pt_center_eye, pt_center_lip], axis=0)
+ else:
+ pt2 = np.stack([pt_left_eye, pt_right_eye], axis=0)
+ return pt2
+
+
+def parse_pt2_from_pt106(pt106, use_lip=True):
+ """
+ parsing the 2 points according to the 106 points, which cancels the roll
+ """
+ pt_left_eye = np.mean(pt106[[33, 35, 40, 39]], axis=0) # left eye center
+ pt_right_eye = np.mean(pt106[[87, 89, 94, 93]], axis=0) # right eye center
+
+ if use_lip:
+ # use lip
+ pt_center_eye = (pt_left_eye + pt_right_eye) / 2
+ pt_center_lip = (pt106[52] + pt106[61]) / 2
+ pt2 = np.stack([pt_center_eye, pt_center_lip], axis=0)
+ else:
+ pt2 = np.stack([pt_left_eye, pt_right_eye], axis=0)
+ return pt2
+
+
+def parse_pt2_from_pt203(pt203, use_lip=True):
+ """
+ parsing the 2 points according to the 203 points, which cancels the roll
+ """
+ pt_left_eye = np.mean(pt203[[0, 6, 12, 18]], axis=0) # left eye center
+ pt_right_eye = np.mean(pt203[[24, 30, 36, 42]], axis=0) # right eye center
+ if use_lip:
+ # use lip
+ pt_center_eye = (pt_left_eye + pt_right_eye) / 2
+ pt_center_lip = (pt203[48] + pt203[66]) / 2
+ pt2 = np.stack([pt_center_eye, pt_center_lip], axis=0)
+ else:
+ pt2 = np.stack([pt_left_eye, pt_right_eye], axis=0)
+ return pt2
+
+
+def parse_pt2_from_pt68(pt68, use_lip=True):
+ """
+ parsing the 2 points according to the 68 points, which cancels the roll
+ """
+ lm_idx = np.array([31, 37, 40, 43, 46, 49, 55], dtype=np.int32) - 1
+ if use_lip:
+ pt5 = np.stack([
+ np.mean(pt68[lm_idx[[1, 2]], :], 0), # left eye
+ np.mean(pt68[lm_idx[[3, 4]], :], 0), # right eye
+ pt68[lm_idx[0], :], # nose
+ pt68[lm_idx[5], :], # lip
+ pt68[lm_idx[6], :] # lip
+ ], axis=0)
+
+ pt2 = np.stack([
+ (pt5[0] + pt5[1]) / 2,
+ (pt5[3] + pt5[4]) / 2
+ ], axis=0)
+ else:
+ pt2 = np.stack([
+ np.mean(pt68[lm_idx[[1, 2]], :], 0), # left eye
+ np.mean(pt68[lm_idx[[3, 4]], :], 0), # right eye
+ ], axis=0)
+
+ return pt2
+
+
+def parse_pt2_from_pt5(pt5, use_lip=True):
+ """
+ parsing the 2 points according to the 5 points, which cancels the roll
+ """
+ if use_lip:
+ pt2 = np.stack([
+ (pt5[0] + pt5[1]) / 2,
+ (pt5[3] + pt5[4]) / 2
+ ], axis=0)
+ else:
+ pt2 = np.stack([
+ pt5[0],
+ pt5[1]
+ ], axis=0)
+ return pt2
+
+
+def parse_pt2_from_pt_x(pts, use_lip=True):
+ if pts.shape[0] == 101:
+ pt2 = parse_pt2_from_pt101(pts, use_lip=use_lip)
+ elif pts.shape[0] == 106:
+ pt2 = parse_pt2_from_pt106(pts, use_lip=use_lip)
+ elif pts.shape[0] == 68:
+ pt2 = parse_pt2_from_pt68(pts, use_lip=use_lip)
+ elif pts.shape[0] == 5:
+ pt2 = parse_pt2_from_pt5(pts, use_lip=use_lip)
+ elif pts.shape[0] == 203:
+ pt2 = parse_pt2_from_pt203(pts, use_lip=use_lip)
+ elif pts.shape[0] > 101:
+ # take the first 101 points
+ pt2 = parse_pt2_from_pt101(pts[:101], use_lip=use_lip)
+ else:
+ raise Exception(f'Unknow shape: {pts.shape}')
+
+ if not use_lip:
+ # NOTE: to compile with the latter code, need to rotate the pt2 90 degrees clockwise manually
+ v = pt2[1] - pt2[0]
+ pt2[1, 0] = pt2[0, 0] - v[1]
+ pt2[1, 1] = pt2[0, 1] + v[0]
+
+ return pt2
+
+
+def parse_rect_from_landmark(
+ pts,
+ scale=1.5,
+ need_square=True,
+ vx_ratio=0,
+ vy_ratio=0,
+ use_deg_flag=False,
+ **kwargs
+):
+ """parsing center, size, angle from 101/68/5/x landmarks
+ vx_ratio: the offset ratio along the pupil axis x-axis, multiplied by size
+ vy_ratio: the offset ratio along the pupil axis y-axis, multiplied by size, which is used to contain more forehead area
+
+ judge with pts.shape
+ """
+ pt2 = parse_pt2_from_pt_x(pts, use_lip=kwargs.get('use_lip', True))
+
+ uy = pt2[1] - pt2[0]
+ l = np.linalg.norm(uy)
+ if l <= 1e-3:
+ uy = np.array([0, 1], dtype=DTYPE)
+ else:
+ uy /= l
+ ux = np.array((uy[1], -uy[0]), dtype=DTYPE)
+
+ # the rotation degree of the x-axis, the clockwise is positive, the counterclockwise is negative (image coordinate system)
+ # print(uy)
+ # print(ux)
+ angle = acos(ux[0])
+ if ux[1] < 0:
+ angle = -angle
+
+ # rotation matrix
+ M = np.array([ux, uy])
+
+ # calculate the size which contains the angle degree of the bbox, and the center
+ center0 = np.mean(pts, axis=0)
+ rpts = (pts - center0) @ M.T # (M @ P.T).T = P @ M.T
+ lt_pt = np.min(rpts, axis=0)
+ rb_pt = np.max(rpts, axis=0)
+ center1 = (lt_pt + rb_pt) / 2
+
+ size = rb_pt - lt_pt
+ if need_square:
+ m = max(size[0], size[1])
+ size[0] = m
+ size[1] = m
+
+ size *= scale # scale size
+ center = center0 + ux * center1[0] + uy * center1[1] # counterclockwise rotation, equivalent to M.T @ center1.T
+ center = center + ux * (vx_ratio * size) + uy * \
+ (vy_ratio * size) # considering the offset in vx and vy direction
+
+ if use_deg_flag:
+ angle = degrees(angle)
+
+ return center, size, angle
+
+
+def parse_bbox_from_landmark(pts, **kwargs):
+ center, size, angle = parse_rect_from_landmark(pts, **kwargs)
+ cx, cy = center
+ w, h = size
+
+ # calculate the vertex positions before rotation
+ bbox = np.array([
+ [cx-w/2, cy-h/2], # left, top
+ [cx+w/2, cy-h/2],
+ [cx+w/2, cy+h/2], # right, bottom
+ [cx-w/2, cy+h/2]
+ ], dtype=DTYPE)
+
+ # construct rotation matrix
+ bbox_rot = bbox.copy()
+ R = np.array([
+ [np.cos(angle), -np.sin(angle)],
+ [np.sin(angle), np.cos(angle)]
+ ], dtype=DTYPE)
+
+ # calculate the relative position of each vertex from the rotation center, then rotate these positions, and finally add the coordinates of the rotation center
+ bbox_rot = (bbox_rot - center) @ R.T + center
+
+ return {
+ 'center': center, # 2x1
+ 'size': size, # scalar
+ 'angle': angle, # rad, counterclockwise
+ 'bbox': bbox, # 4x2
+ 'bbox_rot': bbox_rot, # 4x2
+ }
+
+
+def crop_image_by_bbox(img, bbox, lmk=None, dsize=512, angle=None, flag_rot=False, **kwargs):
+ left, top, right, bot = bbox
+ if int(right - left) != int(bot - top):
+ print(f'right-left {right-left} != bot-top {bot-top}')
+ size = right - left
+
+ src_center = np.array([(left + right) / 2, (top + bot) / 2], dtype=DTYPE)
+ tgt_center = np.array([dsize / 2, dsize / 2], dtype=DTYPE)
+
+ s = dsize / size # scale
+ if flag_rot and angle is not None:
+ costheta, sintheta = cos(angle), sin(angle)
+ cx, cy = src_center[0], src_center[1] # ori center
+ tcx, tcy = tgt_center[0], tgt_center[1] # target center
+ # need to infer
+ M_o2c = np.array(
+ [[s * costheta, s * sintheta, tcx - s * (costheta * cx + sintheta * cy)],
+ [-s * sintheta, s * costheta, tcy - s * (-sintheta * cx + costheta * cy)]],
+ dtype=DTYPE
+ )
+ else:
+ M_o2c = np.array(
+ [[s, 0, tgt_center[0] - s * src_center[0]],
+ [0, s, tgt_center[1] - s * src_center[1]]],
+ dtype=DTYPE
+ )
+
+ # if flag_rot and angle is None:
+ # print('angle is None, but flag_rotate is True', style="bold yellow")
+
+ img_crop = _transform_img(img, M_o2c, dsize=dsize, borderMode=kwargs.get('borderMode', None))
+ lmk_crop = _transform_pts(lmk, M_o2c) if lmk is not None else None
+
+ M_o2c = np.vstack([M_o2c, np.array([0, 0, 1], dtype=DTYPE)])
+ M_c2o = np.linalg.inv(M_o2c)
+
+ # cv2.imwrite('crop.jpg', img_crop)
+
+ return {
+ 'img_crop': img_crop,
+ 'lmk_crop': lmk_crop,
+ 'M_o2c': M_o2c,
+ 'M_c2o': M_c2o,
+ }
+
+
+def _estimate_similar_transform_from_pts(
+ pts,
+ dsize,
+ scale=1.5,
+ vx_ratio=0,
+ vy_ratio=-0.1,
+ flag_do_rot=True,
+ **kwargs
+):
+ """ calculate the affine matrix of the cropped image from sparse points, the original image to the cropped image, the inverse is the cropped image to the original image
+ pts: landmark, 101 or 68 points or other points, Nx2
+ scale: the larger scale factor, the smaller face ratio
+ vx_ratio: x shift
+ vy_ratio: y shift, the smaller the y shift, the lower the face region
+ rot_flag: if it is true, conduct correction
+ """
+ center, size, angle = parse_rect_from_landmark(
+ pts, scale=scale, vx_ratio=vx_ratio, vy_ratio=vy_ratio,
+ use_lip=kwargs.get('use_lip', True)
+ )
+
+ s = dsize / size[0] # scale
+ tgt_center = np.array([dsize / 2, dsize / 2], dtype=DTYPE) # center of dsize
+
+ if flag_do_rot:
+ costheta, sintheta = cos(angle), sin(angle)
+ cx, cy = center[0], center[1] # ori center
+ tcx, tcy = tgt_center[0], tgt_center[1] # target center
+ # need to infer
+ M_INV = np.array(
+ [[s * costheta, s * sintheta, tcx - s * (costheta * cx + sintheta * cy)],
+ [-s * sintheta, s * costheta, tcy - s * (-sintheta * cx + costheta * cy)]],
+ dtype=DTYPE
+ )
+ else:
+ M_INV = np.array(
+ [[s, 0, tgt_center[0] - s * center[0]],
+ [0, s, tgt_center[1] - s * center[1]]],
+ dtype=DTYPE
+ )
+
+ M_INV_H = np.vstack([M_INV, np.array([0, 0, 1])])
+ M = np.linalg.inv(M_INV_H)
+
+ # M_INV is from the original image to the cropped image, M is from the cropped image to the original image
+ return M_INV, M[:2, ...]
+
+
+def crop_image(img, pts: np.ndarray, **kwargs):
+ dsize = kwargs.get('dsize', 224)
+ scale = kwargs.get('scale', 1.5) # 1.5 | 1.6
+ vy_ratio = kwargs.get('vy_ratio', -0.1) # -0.0625 | -0.1
+
+ M_INV, _ = _estimate_similar_transform_from_pts(
+ pts,
+ dsize=dsize,
+ scale=scale,
+ vy_ratio=vy_ratio,
+ flag_do_rot=kwargs.get('flag_do_rot', True),
+ )
+
+ img_crop = _transform_img(img, M_INV, dsize) # origin to crop
+ pt_crop = _transform_pts(pts, M_INV)
+
+ M_o2c = np.vstack([M_INV, np.array([0, 0, 1], dtype=DTYPE)])
+ M_c2o = np.linalg.inv(M_o2c)
+
+ ret_dct = {
+ 'M_o2c': M_o2c, # from the original image to the cropped image 3x3
+ 'M_c2o': M_c2o, # from the cropped image to the original image 3x3
+ 'img_crop': img_crop, # the cropped image
+ 'pt_crop': pt_crop, # the landmarks of the cropped image
+ }
+
+ return ret_dct
+
+def average_bbox_lst(bbox_lst):
+ if len(bbox_lst) == 0:
+ return None
+ bbox_arr = np.array(bbox_lst)
+ return np.mean(bbox_arr, axis=0).tolist()
+
+def prepare_paste_back(mask_crop, crop_M_c2o, dsize):
+ """prepare mask for later image paste back
+ """
+ mask_ori = _transform_img(mask_crop, crop_M_c2o, dsize)
+ mask_ori = mask_ori.astype(np.float32) / 255.
+ return mask_ori
+
+def paste_back(img_crop, M_c2o, img_ori, mask_ori):
+ """paste back the image
+ """
+ dsize = (img_ori.shape[1], img_ori.shape[0])
+ result = _transform_img(img_crop, M_c2o, dsize=dsize)
+ result = np.clip(mask_ori * result + (1 - mask_ori) * img_ori, 0, 255).astype(np.uint8)
+ return result
diff --git a/subprocess/LivePortrait/src/utils/cropper.py b/subprocess/LivePortrait/src/utils/cropper.py
new file mode 100644
index 0000000000000000000000000000000000000000..81fe74d4cfe822b8a008e0dab88ec7fb75a52558
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/cropper.py
@@ -0,0 +1,183 @@
+# coding: utf-8
+
+import numpy as np
+import os.path as osp
+from typing import List, Union, Tuple
+from dataclasses import dataclass, field
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
+
+from ..config.crop_config import CropConfig
+from .landmark_runner import LandmarkRunner
+from .face_analysis_diy import FaceAnalysisDIY
+from .crop import crop_image, crop_image_by_bbox, parse_bbox_from_landmark, average_bbox_lst
+from .rprint import rlog as log
+from .io import contiguous
+
+
+def make_abs_path(fn):
+ return osp.join(osp.dirname(osp.realpath(__file__)), fn)
+
+
+@dataclass
+class Trajectory:
+ start: int = -1 # start frame
+ end: int = -1 # end frame
+ lmk_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # lmk list
+ bbox_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # bbox list
+
+ frame_rgb_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # frame list
+
+ lmk_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # lmk list
+ frame_rgb_crop_lst: Union[Tuple, List, np.ndarray] = field(default_factory=list) # frame crop list
+
+
+class Cropper(object):
+ def __init__(self, **kwargs) -> None:
+ device_id = kwargs.get('device_id', 0)
+ flag_force_cpu = kwargs.get('flag_force_cpu', False)
+ if flag_force_cpu:
+ device = 'cpu'
+ face_analysis_wrapper_provicer = ['CPUExecutionProvider']
+ else:
+ device = 'cuda'
+ face_analysis_wrapper_provicer = ["CUDAExecutionProvider"]
+ self.landmark_runner = LandmarkRunner(
+ ckpt_path=make_abs_path('../../pretrained_weights/liveportrait/landmark.onnx'),
+ onnx_provider=device,
+ device_id=device_id
+ )
+ self.landmark_runner.warmup()
+
+
+ self.face_analysis_wrapper = FaceAnalysisDIY(
+ name='buffalo_l',
+ root=make_abs_path('../../pretrained_weights/insightface'),
+ providers=face_analysis_wrapper_provicer
+ )
+ self.face_analysis_wrapper.prepare(ctx_id=device_id, det_size=(512, 512))
+ self.face_analysis_wrapper.warmup()
+
+ self.crop_cfg: CropConfig = kwargs.get('crop_cfg', None)
+
+ def update_config(self, user_args):
+ for k, v in user_args.items():
+ if hasattr(self.crop_cfg, k):
+ setattr(self.crop_cfg, k, v)
+
+ def crop_source_image(self, img_rgb_: np.ndarray, crop_cfg: CropConfig):
+ # crop a source image and get neccessary information
+ img_rgb = img_rgb_.copy() # copy it
+
+ img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+ src_face = self.face_analysis_wrapper.get(
+ img_bgr,
+ flag_do_landmark_2d_106=True,
+ direction=crop_cfg.direction,
+ max_face_num=crop_cfg.max_face_num,
+ )
+
+ if len(src_face) == 0:
+ log('No face detected in the source image.')
+ return None
+ elif len(src_face) > 1:
+ log(f'More than one face detected in the image, only pick one face by rule {crop_cfg.direction}.')
+
+ # NOTE: temporarily only pick the first face, to support multiple face in the future
+ src_face = src_face[0]
+ lmk = src_face.landmark_2d_106 # this is the 106 landmarks from insightface
+
+ # crop the face
+ ret_dct = crop_image(
+ img_rgb, # ndarray
+ lmk, # 106x2 or Nx2
+ dsize=crop_cfg.dsize,
+ scale=crop_cfg.scale,
+ vx_ratio=crop_cfg.vx_ratio,
+ vy_ratio=crop_cfg.vy_ratio,
+ )
+
+ lmk = self.landmark_runner.run(img_rgb, lmk)
+ ret_dct['lmk_crop'] = lmk
+
+ # update a 256x256 version for network input
+ ret_dct['img_crop_256x256'] = cv2.resize(ret_dct['img_crop'], (256, 256), interpolation=cv2.INTER_AREA)
+ ret_dct['lmk_crop_256x256'] = ret_dct['lmk_crop'] * 256 / crop_cfg.dsize
+
+ return ret_dct
+
+ def crop_driving_video(self, driving_rgb_lst, **kwargs):
+ """Tracking based landmarks/alignment and cropping"""
+ trajectory = Trajectory()
+ direction = kwargs.get('direction', 'large-small')
+ for idx, frame_rgb in enumerate(driving_rgb_lst):
+ if idx == 0 or trajectory.start == -1:
+ src_face = self.face_analysis_wrapper.get(
+ contiguous(frame_rgb[..., ::-1]),
+ flag_do_landmark_2d_106=True,
+ direction=direction
+ )
+ if len(src_face) == 0:
+ log(f'No face detected in the frame #{idx}')
+ continue
+ elif len(src_face) > 1:
+ log(f'More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}.')
+ src_face = src_face[0]
+ lmk = src_face.landmark_2d_106
+ lmk = self.landmark_runner.run(frame_rgb, lmk)
+ trajectory.start, trajectory.end = idx, idx
+ else:
+ lmk = self.landmark_runner.run(frame_rgb, trajectory.lmk_lst[-1])
+ trajectory.end = idx
+
+ trajectory.lmk_lst.append(lmk)
+ ret_bbox = parse_bbox_from_landmark(lmk, scale=self.crop_cfg.scale_crop_video, vx_ratio_crop_video=self.crop_cfg.vx_ratio_crop_video, vy_ratio=self.crop_cfg.vy_ratio_crop_video)['bbox']
+ bbox = [ret_bbox[0, 0], ret_bbox[0, 1], ret_bbox[2, 0], ret_bbox[2, 1]] # 4,
+ trajectory.bbox_lst.append(bbox) # bbox
+ trajectory.frame_rgb_lst.append(frame_rgb)
+
+ global_bbox = average_bbox_lst(trajectory.bbox_lst)
+
+ for idx, (frame_rgb, lmk) in enumerate(zip(trajectory.frame_rgb_lst, trajectory.lmk_lst)):
+ ret_dct = crop_image_by_bbox(
+ frame_rgb,
+ global_bbox,
+ lmk=lmk,
+ dsize=kwargs.get('dsize', 512),
+ flag_rot=False,
+ borderValue=(0, 0, 0),
+ )
+ trajectory.frame_rgb_crop_lst.append(ret_dct['img_crop'])
+ trajectory.lmk_crop_lst.append(ret_dct['lmk_crop'])
+
+ return {
+ 'frame_crop_lst': trajectory.frame_rgb_crop_lst,
+ 'lmk_crop_lst': trajectory.lmk_crop_lst,
+ }
+
+ def calc_lmks_from_cropped_video(self, driving_rgb_crop_lst, **kwargs):
+ """Tracking based landmarks/alignment"""
+ trajectory = Trajectory()
+ direction = kwargs.get('direction', 'large-small')
+
+ for idx, frame_rgb_crop in enumerate(driving_rgb_crop_lst):
+ if idx == 0 or trajectory.start == -1:
+ src_face = self.face_analysis_wrapper.get(
+ contiguous(frame_rgb_crop[..., ::-1]), # convert to BGR
+ flag_do_landmark_2d_106=True,
+ direction=direction
+ )
+ if len(src_face) == 0:
+ log(f'No face detected in the frame #{idx}')
+ raise Exception(f'No face detected in the frame #{idx}')
+ elif len(src_face) > 1:
+ log(f'More than one face detected in the driving frame_{idx}, only pick one face by rule {direction}.')
+ src_face = src_face[0]
+ lmk = src_face.landmark_2d_106
+ lmk = self.landmark_runner.run(frame_rgb_crop, lmk)
+ trajectory.start, trajectory.end = idx, idx
+ else:
+ lmk = self.landmark_runner.run(frame_rgb_crop, trajectory.lmk_lst[-1])
+ trajectory.end = idx
+
+ trajectory.lmk_lst.append(lmk)
+ return trajectory.lmk_lst
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/__init__.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1680083da47850b31da10803c7d255e67dda619a
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/__init__.py
@@ -0,0 +1,20 @@
+# coding: utf-8
+# pylint: disable=wrong-import-position
+"""InsightFace: A Face Analysis Toolkit."""
+from __future__ import absolute_import
+
+try:
+ #import mxnet as mx
+ import onnxruntime
+except ImportError:
+ raise ImportError(
+ "Unable to import dependency onnxruntime. "
+ )
+
+__version__ = '0.7.3'
+
+from . import model_zoo
+from . import utils
+from . import app
+from . import data
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..96ddc76c2a1c2c28d227153505b3d68689adfd12
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91c8c652d2e12d21dfaeb5258071ac6f9d816109
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__init__.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc574616885290489798bac5c682e7aaa65a5dad
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__init__.py
@@ -0,0 +1 @@
+from .face_analysis import *
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e0b6b8af4332244a67a738907a1cf2fd76de817
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a172f74cdb808a6e78124d2a4c8b092ee50576c5
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4496283ca1ea4d0d6d785520378d7f5682ecf827
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d6c828aa6386f1cb02133c6050110b60258566c
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/common.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1f9368d2e9c43c28e8d750678aacb70b814e5822
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..73923994f78aaedf18d55454e47da4aedbdaa0ab
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/__pycache__/face_analysis.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/common.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..82ca987aeede35510b3aef72b4edf2390ad84e65
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/common.py
@@ -0,0 +1,49 @@
+import numpy as np
+from numpy.linalg import norm as l2norm
+#from easydict import EasyDict
+
+class Face(dict):
+
+ def __init__(self, d=None, **kwargs):
+ if d is None:
+ d = {}
+ if kwargs:
+ d.update(**kwargs)
+ for k, v in d.items():
+ setattr(self, k, v)
+ # Class attributes
+ #for k in self.__class__.__dict__.keys():
+ # if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
+ # setattr(self, k, getattr(self, k))
+
+ def __setattr__(self, name, value):
+ if isinstance(value, (list, tuple)):
+ value = [self.__class__(x)
+ if isinstance(x, dict) else x for x in value]
+ elif isinstance(value, dict) and not isinstance(value, self.__class__):
+ value = self.__class__(value)
+ super(Face, self).__setattr__(name, value)
+ super(Face, self).__setitem__(name, value)
+
+ __setitem__ = __setattr__
+
+ def __getattr__(self, name):
+ return None
+
+ @property
+ def embedding_norm(self):
+ if self.embedding is None:
+ return None
+ return l2norm(self.embedding)
+
+ @property
+ def normed_embedding(self):
+ if self.embedding is None:
+ return None
+ return self.embedding / self.embedding_norm
+
+ @property
+ def sex(self):
+ if self.gender is None:
+ return None
+ return 'M' if self.gender==1 else 'F'
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/app/face_analysis.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/face_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa5128b3f5e02c2c19e7df195cc1c1e7fcf36c4d
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/app/face_analysis.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-05-04
+# @Function :
+
+
+from __future__ import division
+
+import glob
+import os.path as osp
+
+import numpy as np
+import onnxruntime
+from numpy.linalg import norm
+
+from ..model_zoo import model_zoo
+from ..utils import ensure_available
+from .common import Face
+
+
+DEFAULT_MP_NAME = 'buffalo_l'
+__all__ = ['FaceAnalysis']
+
+class FaceAnalysis:
+ def __init__(self, name=DEFAULT_MP_NAME, root='~/.insightface', allowed_modules=None, **kwargs):
+ onnxruntime.set_default_logger_severity(3)
+ self.models = {}
+ self.model_dir = ensure_available('models', name, root=root)
+ onnx_files = glob.glob(osp.join(self.model_dir, '*.onnx'))
+ onnx_files = sorted(onnx_files)
+ for onnx_file in onnx_files:
+ model = model_zoo.get_model(onnx_file, **kwargs)
+ if model is None:
+ print('model not recognized:', onnx_file)
+ elif allowed_modules is not None and model.taskname not in allowed_modules:
+ print('model ignore:', onnx_file, model.taskname)
+ del model
+ elif model.taskname not in self.models and (allowed_modules is None or model.taskname in allowed_modules):
+ # print('find model:', onnx_file, model.taskname, model.input_shape, model.input_mean, model.input_std)
+ self.models[model.taskname] = model
+ else:
+ print('duplicated model task type, ignore:', onnx_file, model.taskname)
+ del model
+ assert 'detection' in self.models
+ self.det_model = self.models['detection']
+
+
+ def prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640)):
+ self.det_thresh = det_thresh
+ assert det_size is not None
+ # print('set det-size:', det_size)
+ self.det_size = det_size
+ for taskname, model in self.models.items():
+ if taskname=='detection':
+ model.prepare(ctx_id, input_size=det_size, det_thresh=det_thresh)
+ else:
+ model.prepare(ctx_id)
+
+ def get(self, img, max_num=0):
+ bboxes, kpss = self.det_model.detect(img,
+ max_num=max_num,
+ metric='default')
+ if bboxes.shape[0] == 0:
+ return []
+ ret = []
+ for i in range(bboxes.shape[0]):
+ bbox = bboxes[i, 0:4]
+ det_score = bboxes[i, 4]
+ kps = None
+ if kpss is not None:
+ kps = kpss[i]
+ face = Face(bbox=bbox, kps=kps, det_score=det_score)
+ for taskname, model in self.models.items():
+ if taskname=='detection':
+ continue
+ model.get(img, face)
+ ret.append(face)
+ return ret
+
+ def draw_on(self, img, faces):
+ import cv2
+ dimg = img.copy()
+ for i in range(len(faces)):
+ face = faces[i]
+ box = face.bbox.astype(np.int)
+ color = (0, 0, 255)
+ cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
+ if face.kps is not None:
+ kps = face.kps.astype(np.int)
+ #print(landmark.shape)
+ for l in range(kps.shape[0]):
+ color = (0, 0, 255)
+ if l == 0 or l == 3:
+ color = (0, 255, 0)
+ cv2.circle(dimg, (kps[l][0], kps[l][1]), 1, color,
+ 2)
+ if face.gender is not None and face.age is not None:
+ cv2.putText(dimg,'%s,%d'%(face.sex,face.age), (box[0]-1, box[1]-4),cv2.FONT_HERSHEY_COMPLEX,0.7,(0,255,0),1)
+
+ #for key, value in face.items():
+ # if key.startswith('landmark_3d'):
+ # print(key, value.shape)
+ # print(value[0:10,:])
+ # lmk = np.round(value).astype(np.int)
+ # for l in range(lmk.shape[0]):
+ # color = (255, 0, 0)
+ # cv2.circle(dimg, (lmk[l][0], lmk[l][1]), 1, color,
+ # 2)
+ return dimg
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__init__.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..665c59ec99b6ebf12822015e0350969c7903e243
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__init__.py
@@ -0,0 +1,2 @@
+from .image import get_image
+from .pickle_object import get_object
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0458745f1071c91320c4855a74331f554eabdf6a
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf4c1e17e7bf3261fa3a3f81a5482f2d07d8a4f8
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef2dae902d902c7a118522cfe43ac2665dedb175
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ea67ece2b1d00fecbd29172fc37087e600da86c
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/image.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..25cc94712dcd59ba2e37557545864d9b242650cd
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7cbc22d0119eba6fd12c4b9aa0275bfc6c5d0688
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/__pycache__/pickle_object.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/image.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d32c4bcb1b13d33bcb0d840cf7b8c08d183b3ea
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/image.py
@@ -0,0 +1,27 @@
+import cv2
+import os
+import os.path as osp
+from pathlib import Path
+
+class ImageCache:
+ data = {}
+
+def get_image(name, to_rgb=False):
+ key = (name, to_rgb)
+ if key in ImageCache.data:
+ return ImageCache.data[key]
+ images_dir = osp.join(Path(__file__).parent.absolute(), 'images')
+ ext_names = ['.jpg', '.png', '.jpeg']
+ image_file = None
+ for ext_name in ext_names:
+ _image_file = osp.join(images_dir, "%s%s"%(name, ext_name))
+ if osp.exists(_image_file):
+ image_file = _image_file
+ break
+ assert image_file is not None, '%s not found'%name
+ img = cv2.imread(image_file)
+ if to_rgb:
+ img = img[:,:,::-1]
+ ImageCache.data[key] = img
+ return img
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/Tom_Hanks_54745.png b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/Tom_Hanks_54745.png
new file mode 100644
index 0000000000000000000000000000000000000000..906315d13fa29bb3a5ded3e162592f2c7f041b23
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/Tom_Hanks_54745.png differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_black.jpg b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_black.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0eab0df555c23f1e033537fe39f3c0c8303dd369
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_black.jpg differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_blue.jpg b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_blue.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f71336b9a0d3038ebd84e6995ebfbe54946fcbb4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_blue.jpg differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_green.jpg b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_green.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ac2ad55f4fc580c915dfa4c157ca3bfc84e453f4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_green.jpg differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_white.jpg b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_white.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2148ab2d09fdee6e3f59315470e98ecfc54339e4
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/mask_white.jpg differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/t1.jpg b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/t1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0d1d64a59675c9590fd12429db647eb169cecff8
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/images/t1.jpg differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/objects/meanshape_68.pkl b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/objects/meanshape_68.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d5297e9e8ea5574298ddd287b058252e03aa18c1
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/objects/meanshape_68.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39ffecf84ba73f0d0d7e49380833ba88713c9fcdec51df4f7ac45a48b8f4cc51
+size 974
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/pickle_object.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/pickle_object.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbd87030ea15e1d01af1cd4cff1be2bc54cc82dd
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/pickle_object.py
@@ -0,0 +1,17 @@
+import cv2
+import os
+import os.path as osp
+from pathlib import Path
+import pickle
+
+def get_object(name):
+ objects_dir = osp.join(Path(__file__).parent.absolute(), 'objects')
+ if not name.endswith('.pkl'):
+ name = name+".pkl"
+ filepath = osp.join(objects_dir, name)
+ if not osp.exists(filepath):
+ return None
+ with open(filepath, 'rb') as f:
+ obj = pickle.load(f)
+ return obj
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/data/rec_builder.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/rec_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..e02abc969da2f882639326f5bad3c7e8d08c1fde
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/data/rec_builder.py
@@ -0,0 +1,71 @@
+import pickle
+import numpy as np
+import os
+import os.path as osp
+import sys
+import mxnet as mx
+
+
+class RecBuilder():
+ def __init__(self, path, image_size=(112, 112)):
+ self.path = path
+ self.image_size = image_size
+ self.widx = 0
+ self.wlabel = 0
+ self.max_label = -1
+ assert not osp.exists(path), '%s exists' % path
+ os.makedirs(path)
+ self.writer = mx.recordio.MXIndexedRecordIO(os.path.join(path, 'train.idx'),
+ os.path.join(path, 'train.rec'),
+ 'w')
+ self.meta = []
+
+ def add(self, imgs):
+ #!!! img should be BGR!!!!
+ #assert label >= 0
+ #assert label > self.last_label
+ assert len(imgs) > 0
+ label = self.wlabel
+ for img in imgs:
+ idx = self.widx
+ image_meta = {'image_index': idx, 'image_classes': [label]}
+ header = mx.recordio.IRHeader(0, label, idx, 0)
+ if isinstance(img, np.ndarray):
+ s = mx.recordio.pack_img(header,img,quality=95,img_fmt='.jpg')
+ else:
+ s = mx.recordio.pack(header, img)
+ self.writer.write_idx(idx, s)
+ self.meta.append(image_meta)
+ self.widx += 1
+ self.max_label = label
+ self.wlabel += 1
+
+
+ def add_image(self, img, label):
+ #!!! img should be BGR!!!!
+ #assert label >= 0
+ #assert label > self.last_label
+ idx = self.widx
+ header = mx.recordio.IRHeader(0, label, idx, 0)
+ if isinstance(label, list):
+ idlabel = label[0]
+ else:
+ idlabel = label
+ image_meta = {'image_index': idx, 'image_classes': [idlabel]}
+ if isinstance(img, np.ndarray):
+ s = mx.recordio.pack_img(header,img,quality=95,img_fmt='.jpg')
+ else:
+ s = mx.recordio.pack(header, img)
+ self.writer.write_idx(idx, s)
+ self.meta.append(image_meta)
+ self.widx += 1
+ self.max_label = max(self.max_label, idlabel)
+
+ def close(self):
+ with open(osp.join(self.path, 'train.meta'), 'wb') as pfile:
+ pickle.dump(self.meta, pfile, protocol=pickle.HIGHEST_PROTOCOL)
+ print('stat:', self.widx, self.wlabel)
+ with open(os.path.join(self.path, 'property'), 'w') as f:
+ f.write("%d,%d,%d\n" % (self.max_label+1, self.image_size[0], self.image_size[1]))
+ f.write("%d\n" % (self.widx))
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__init__.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..225623d6142c968b4040f391039bfab88bdd1b2a
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__init__.py
@@ -0,0 +1,6 @@
+from .model_zoo import get_model
+from .arcface_onnx import ArcFaceONNX
+from .retinaface import RetinaFace
+from .scrfd import SCRFD
+from .landmark import Landmark
+from .attribute import Attribute
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84439bd0027534962db5138e5e42e614939dadca
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2617284bfb240b557f2b2e0a4d24b0e10b36faf0
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1cebee26f3f5c4877a0ab23ee61387e3f37a7e94
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c7c56f8a154a5f74fa01375aa62dbc7c2256a32
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/arcface_onnx.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91d7bf7b98786a0a2e577e3ef758d25657514ed7
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95e2f8ae781b977e96a6f3768e64eaef111669d0
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/attribute.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f05ba7999457cdc9ef70f626a276f9778f9f17b
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6694ae931894c1d67953c92dfbc9a2c8a6c7982
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/inswapper.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..25d27f4ad8d657d4a805e473f7176a563331d2cc
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3a28c3cef8a60ab8279b70588f04bd2331e946e
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/landmark.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e52cf2788a260a75b3881f16500b20325aa9d18
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a9ac67ef0bbb3290f54ca05dff8f04efbf01a14
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/model_zoo.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f72df6676855d2770e1bc2634b3910a3925d0e02
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b49c05d2a8261864a66cb47f98e37cdcf51fceb
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/retinaface.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2e5ff0293a82b16eaaaab1f34aa41411cb6ad44
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e1d0c7ce2ef48607db7c212d97ca474d8738b643
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/__pycache__/scrfd.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/arcface_onnx.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/arcface_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..b537ce2ee15d4a1834d54e185f34e336aab30a77
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/arcface_onnx.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-05-04
+# @Function :
+
+from __future__ import division
+import numpy as np
+import cv2
+import onnx
+import onnxruntime
+from ..utils import face_align
+
+__all__ = [
+ 'ArcFaceONNX',
+]
+
+
+class ArcFaceONNX:
+ def __init__(self, model_file=None, session=None):
+ assert model_file is not None
+ self.model_file = model_file
+ self.session = session
+ self.taskname = 'recognition'
+ find_sub = False
+ find_mul = False
+ model = onnx.load(self.model_file)
+ graph = model.graph
+ for nid, node in enumerate(graph.node[:8]):
+ #print(nid, node.name)
+ if node.name.startswith('Sub') or node.name.startswith('_minus'):
+ find_sub = True
+ if node.name.startswith('Mul') or node.name.startswith('_mul'):
+ find_mul = True
+ if find_sub and find_mul:
+ #mxnet arcface model
+ input_mean = 0.0
+ input_std = 1.0
+ else:
+ input_mean = 127.5
+ input_std = 127.5
+ self.input_mean = input_mean
+ self.input_std = input_std
+ #print('input mean and std:', self.input_mean, self.input_std)
+ if self.session is None:
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ input_cfg = self.session.get_inputs()[0]
+ input_shape = input_cfg.shape
+ input_name = input_cfg.name
+ self.input_size = tuple(input_shape[2:4][::-1])
+ self.input_shape = input_shape
+ outputs = self.session.get_outputs()
+ output_names = []
+ for out in outputs:
+ output_names.append(out.name)
+ self.input_name = input_name
+ self.output_names = output_names
+ assert len(self.output_names)==1
+ self.output_shape = outputs[0].shape
+
+ def prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+
+ def get(self, img, face):
+ aimg = face_align.norm_crop(img, landmark=face.kps, image_size=self.input_size[0])
+ face.embedding = self.get_feat(aimg).flatten()
+ return face.embedding
+
+ def compute_sim(self, feat1, feat2):
+ from numpy.linalg import norm
+ feat1 = feat1.ravel()
+ feat2 = feat2.ravel()
+ sim = np.dot(feat1, feat2) / (norm(feat1) * norm(feat2))
+ return sim
+
+ def get_feat(self, imgs):
+ if not isinstance(imgs, list):
+ imgs = [imgs]
+ input_size = self.input_size
+
+ blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
+ (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
+ return net_out
+
+ def forward(self, batch_data):
+ blob = (batch_data - self.input_mean) / self.input_std
+ net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
+ return net_out
+
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/attribute.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/attribute.py
new file mode 100644
index 0000000000000000000000000000000000000000..40c34de3f0995499448cf5779004cc1e5f3564fb
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/attribute.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-06-19
+# @Function :
+
+from __future__ import division
+import numpy as np
+import cv2
+import onnx
+import onnxruntime
+from ..utils import face_align
+
+__all__ = [
+ 'Attribute',
+]
+
+
+class Attribute:
+ def __init__(self, model_file=None, session=None):
+ assert model_file is not None
+ self.model_file = model_file
+ self.session = session
+ find_sub = False
+ find_mul = False
+ model = onnx.load(self.model_file)
+ graph = model.graph
+ for nid, node in enumerate(graph.node[:8]):
+ #print(nid, node.name)
+ if node.name.startswith('Sub') or node.name.startswith('_minus'):
+ find_sub = True
+ if node.name.startswith('Mul') or node.name.startswith('_mul'):
+ find_mul = True
+ if nid<3 and node.name=='bn_data':
+ find_sub = True
+ find_mul = True
+ if find_sub and find_mul:
+ #mxnet arcface model
+ input_mean = 0.0
+ input_std = 1.0
+ else:
+ input_mean = 127.5
+ input_std = 128.0
+ self.input_mean = input_mean
+ self.input_std = input_std
+ #print('input mean and std:', model_file, self.input_mean, self.input_std)
+ if self.session is None:
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ input_cfg = self.session.get_inputs()[0]
+ input_shape = input_cfg.shape
+ input_name = input_cfg.name
+ self.input_size = tuple(input_shape[2:4][::-1])
+ self.input_shape = input_shape
+ outputs = self.session.get_outputs()
+ output_names = []
+ for out in outputs:
+ output_names.append(out.name)
+ self.input_name = input_name
+ self.output_names = output_names
+ assert len(self.output_names)==1
+ output_shape = outputs[0].shape
+ #print('init output_shape:', output_shape)
+ if output_shape[1]==3:
+ self.taskname = 'genderage'
+ else:
+ self.taskname = 'attribute_%d'%output_shape[1]
+
+ def prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+
+ def get(self, img, face):
+ bbox = face.bbox
+ w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+ center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+ rotate = 0
+ _scale = self.input_size[0] / (max(w, h)*1.5)
+ #print('param:', img.shape, bbox, center, self.input_size, _scale, rotate)
+ aimg, M = face_align.transform(img, center, self.input_size[0], _scale, rotate)
+ input_size = tuple(aimg.shape[0:2][::-1])
+ #assert input_size==self.input_size
+ blob = cv2.dnn.blobFromImage(aimg, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ pred = self.session.run(self.output_names, {self.input_name : blob})[0][0]
+ if self.taskname=='genderage':
+ assert len(pred)==3
+ gender = np.argmax(pred[:2])
+ age = int(np.round(pred[2]*100))
+ face['gender'] = gender
+ face['age'] = age
+ return gender, age
+ else:
+ return pred
+
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/inswapper.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/inswapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f321c627ee66cceddcab98b561b997441dd4f768
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/inswapper.py
@@ -0,0 +1,114 @@
+import time
+import numpy as np
+import onnxruntime
+import cv2
+import onnx
+from onnx import numpy_helper
+from ..utils import face_align
+
+
+
+
+class INSwapper():
+ def __init__(self, model_file=None, session=None):
+ self.model_file = model_file
+ self.session = session
+ model = onnx.load(self.model_file)
+ graph = model.graph
+ self.emap = numpy_helper.to_array(graph.initializer[-1])
+ self.input_mean = 0.0
+ self.input_std = 255.0
+ #print('input mean and std:', model_file, self.input_mean, self.input_std)
+ if self.session is None:
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ inputs = self.session.get_inputs()
+ self.input_names = []
+ for inp in inputs:
+ self.input_names.append(inp.name)
+ outputs = self.session.get_outputs()
+ output_names = []
+ for out in outputs:
+ output_names.append(out.name)
+ self.output_names = output_names
+ assert len(self.output_names)==1
+ output_shape = outputs[0].shape
+ input_cfg = inputs[0]
+ input_shape = input_cfg.shape
+ self.input_shape = input_shape
+ # print('inswapper-shape:', self.input_shape)
+ self.input_size = tuple(input_shape[2:4][::-1])
+
+ def forward(self, img, latent):
+ img = (img - self.input_mean) / self.input_std
+ pred = self.session.run(self.output_names, {self.input_names[0]: img, self.input_names[1]: latent})[0]
+ return pred
+
+ def get(self, img, target_face, source_face, paste_back=True):
+ face_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+ cv2.fillPoly(face_mask, np.array([target_face.landmark_2d_106[[1,9,10,11,12,13,14,15,16,2,3,4,5,6,7,8,0,24,23,22,21,20,19,18,32,31,30,29,28,27,26,25,17,101,105,104,103,51,49,48,43]].astype('int64')]), 1)
+ aimg, M = face_align.norm_crop2(img, target_face.kps, self.input_size[0])
+ blob = cv2.dnn.blobFromImage(aimg, 1.0 / self.input_std, self.input_size,
+ (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ latent = source_face.normed_embedding.reshape((1,-1))
+ latent = np.dot(latent, self.emap)
+ latent /= np.linalg.norm(latent)
+ pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
+ #print(latent.shape, latent.dtype, pred.shape)
+ img_fake = pred.transpose((0,2,3,1))[0]
+ bgr_fake = np.clip(255 * img_fake, 0, 255).astype(np.uint8)[:,:,::-1]
+ if not paste_back:
+ return bgr_fake, M
+ else:
+ target_img = img
+ fake_diff = bgr_fake.astype(np.float32) - aimg.astype(np.float32)
+ fake_diff = np.abs(fake_diff).mean(axis=2)
+ fake_diff[:2,:] = 0
+ fake_diff[-2:,:] = 0
+ fake_diff[:,:2] = 0
+ fake_diff[:,-2:] = 0
+ IM = cv2.invertAffineTransform(M)
+ img_white = np.full((aimg.shape[0],aimg.shape[1]), 255, dtype=np.float32)
+ bgr_fake = cv2.warpAffine(bgr_fake, IM, (target_img.shape[1], target_img.shape[0]), borderValue=0.0)
+ img_white = cv2.warpAffine(img_white, IM, (target_img.shape[1], target_img.shape[0]), borderValue=0.0)
+ fake_diff = cv2.warpAffine(fake_diff, IM, (target_img.shape[1], target_img.shape[0]), borderValue=0.0)
+ img_white[img_white>20] = 255
+ fthresh = 10
+ fake_diff[fake_diff=fthresh] = 255
+ img_mask = img_white
+ mask_h_inds, mask_w_inds = np.where(img_mask==255)
+ mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
+ mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
+ mask_size = int(np.sqrt(mask_h*mask_w))
+ k = max(mask_size//10, 10)
+ #k = max(mask_size//20, 6)
+ #k = 6
+ kernel = np.ones((k,k),np.uint8)
+ img_mask = cv2.erode(img_mask,kernel,iterations = 1)
+ kernel = np.ones((2,2),np.uint8)
+ fake_diff = cv2.dilate(fake_diff,kernel,iterations = 1)
+
+ face_mask = cv2.erode(face_mask,np.ones((11,11),np.uint8),iterations = 1)
+ fake_diff[face_mask==1] = 255
+
+ k = max(mask_size//20, 5)
+ #k = 3
+ #k = 3
+ kernel_size = (k, k)
+ blur_size = tuple(2*i+1 for i in kernel_size)
+ img_mask = cv2.GaussianBlur(img_mask, blur_size, 0)
+ k = 5
+ kernel_size = (k, k)
+ blur_size = tuple(2*i+1 for i in kernel_size)
+ fake_diff = cv2.blur(fake_diff, (11,11), 0)
+ ##fake_diff = cv2.GaussianBlur(fake_diff, blur_size, 0)
+ # print('blur_size: ', blur_size)
+ # fake_diff = cv2.blur(fake_diff, (21, 21), 0) # blur_size
+ img_mask /= 255
+ fake_diff /= 255
+ # img_mask = fake_diff
+ img_mask = img_mask*fake_diff
+ img_mask = np.reshape(img_mask, [img_mask.shape[0],img_mask.shape[1],1])
+ fake_merged = img_mask * bgr_fake + (1-img_mask) * target_img.astype(np.float32)
+ fake_merged = fake_merged.astype(np.uint8)
+ return fake_merged
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/landmark.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/landmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..598b4b29a2d0674d8bb25b681f921c61460d101c
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/landmark.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-05-04
+# @Function :
+
+from __future__ import division
+import numpy as np
+import cv2
+import onnx
+import onnxruntime
+from ..utils import face_align
+from ..utils import transform
+from ..data import get_object
+
+__all__ = [
+ 'Landmark',
+]
+
+
+class Landmark:
+ def __init__(self, model_file=None, session=None):
+ assert model_file is not None
+ self.model_file = model_file
+ self.session = session
+ find_sub = False
+ find_mul = False
+ model = onnx.load(self.model_file)
+ graph = model.graph
+ for nid, node in enumerate(graph.node[:8]):
+ #print(nid, node.name)
+ if node.name.startswith('Sub') or node.name.startswith('_minus'):
+ find_sub = True
+ if node.name.startswith('Mul') or node.name.startswith('_mul'):
+ find_mul = True
+ if nid<3 and node.name=='bn_data':
+ find_sub = True
+ find_mul = True
+ if find_sub and find_mul:
+ #mxnet arcface model
+ input_mean = 0.0
+ input_std = 1.0
+ else:
+ input_mean = 127.5
+ input_std = 128.0
+ self.input_mean = input_mean
+ self.input_std = input_std
+ #print('input mean and std:', model_file, self.input_mean, self.input_std)
+ if self.session is None:
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ input_cfg = self.session.get_inputs()[0]
+ input_shape = input_cfg.shape
+ input_name = input_cfg.name
+ self.input_size = tuple(input_shape[2:4][::-1])
+ self.input_shape = input_shape
+ outputs = self.session.get_outputs()
+ output_names = []
+ for out in outputs:
+ output_names.append(out.name)
+ self.input_name = input_name
+ self.output_names = output_names
+ assert len(self.output_names)==1
+ output_shape = outputs[0].shape
+ self.require_pose = False
+ #print('init output_shape:', output_shape)
+ if output_shape[1]==3309:
+ self.lmk_dim = 3
+ self.lmk_num = 68
+ self.mean_lmk = get_object('meanshape_68.pkl')
+ self.require_pose = True
+ else:
+ self.lmk_dim = 2
+ self.lmk_num = output_shape[1]//self.lmk_dim
+ self.taskname = 'landmark_%dd_%d'%(self.lmk_dim, self.lmk_num)
+
+ def prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+
+ def get(self, img, face):
+ bbox = face.bbox
+ w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+ center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+ rotate = 0
+ _scale = self.input_size[0] / (max(w, h)*1.5)
+ #print('param:', img.shape, bbox, center, self.input_size, _scale, rotate)
+ aimg, M = face_align.transform(img, center, self.input_size[0], _scale, rotate)
+ input_size = tuple(aimg.shape[0:2][::-1])
+ #assert input_size==self.input_size
+ blob = cv2.dnn.blobFromImage(aimg, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ pred = self.session.run(self.output_names, {self.input_name : blob})[0][0]
+ if pred.shape[0] >= 3000:
+ pred = pred.reshape((-1, 3))
+ else:
+ pred = pred.reshape((-1, 2))
+ if self.lmk_num < pred.shape[0]:
+ pred = pred[self.lmk_num*-1:,:]
+ pred[:, 0:2] += 1
+ pred[:, 0:2] *= (self.input_size[0] // 2)
+ if pred.shape[1] == 3:
+ pred[:, 2] *= (self.input_size[0] // 2)
+
+ IM = cv2.invertAffineTransform(M)
+ pred = face_align.trans_points(pred, IM)
+ face[self.taskname] = pred
+ if self.require_pose:
+ P = transform.estimate_affine_matrix_3d23d(self.mean_lmk, pred)
+ s, R, t = transform.P2sRt(P)
+ rx, ry, rz = transform.matrix2angle(R)
+ pose = np.array( [rx, ry, rz], dtype=np.float32 )
+ face['pose'] = pose #pitch, yaw, roll
+ return pred
+
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_store.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..50bb85d314f5b7a0ea8211d2cd21186e32791592
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_store.py
@@ -0,0 +1,103 @@
+"""
+This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_store.py
+"""
+from __future__ import print_function
+
+__all__ = ['get_model_file']
+import os
+import zipfile
+import glob
+
+from ..utils import download, check_sha1
+
+_model_sha1 = {
+ name: checksum
+ for checksum, name in [
+ ('95be21b58e29e9c1237f229dae534bd854009ce0', 'arcface_r100_v1'),
+ ('', 'arcface_mfn_v1'),
+ ('39fd1e087a2a2ed70a154ac01fecaa86c315d01b', 'retinaface_r50_v1'),
+ ('2c9de8116d1f448fd1d4661f90308faae34c990a', 'retinaface_mnet025_v1'),
+ ('0db1d07921d005e6c9a5b38e059452fc5645e5a4', 'retinaface_mnet025_v2'),
+ ('7dd8111652b7aac2490c5dcddeb268e53ac643e6', 'genderage_v1'),
+ ]
+}
+
+base_repo_url = 'https://insightface.ai/files/'
+_url_format = '{repo_url}models/{file_name}.zip'
+
+
+def short_hash(name):
+ if name not in _model_sha1:
+ raise ValueError(
+ 'Pretrained model for {name} is not available.'.format(name=name))
+ return _model_sha1[name][:8]
+
+
+def find_params_file(dir_path):
+ if not os.path.exists(dir_path):
+ return None
+ paths = glob.glob("%s/*.params" % dir_path)
+ if len(paths) == 0:
+ return None
+ paths = sorted(paths)
+ return paths[-1]
+
+
+def get_model_file(name, root=os.path.join('~', '.insightface', 'models')):
+ r"""Return location for the pretrained on local file system.
+
+ This function will download from online model zoo when model cannot be found or has mismatch.
+ The root directory will be created if it doesn't exist.
+
+ Parameters
+ ----------
+ name : str
+ Name of the model.
+ root : str, default '~/.mxnet/models'
+ Location for keeping the model parameters.
+
+ Returns
+ -------
+ file_path
+ Path to the requested pretrained model file.
+ """
+
+ file_name = name
+ root = os.path.expanduser(root)
+ dir_path = os.path.join(root, name)
+ file_path = find_params_file(dir_path)
+ #file_path = os.path.join(root, file_name + '.params')
+ sha1_hash = _model_sha1[name]
+ if file_path is not None:
+ if check_sha1(file_path, sha1_hash):
+ return file_path
+ else:
+ print(
+ 'Mismatch in the content of model file detected. Downloading again.'
+ )
+ else:
+ print('Model file is not found. Downloading.')
+
+ if not os.path.exists(root):
+ os.makedirs(root)
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+
+ zip_file_path = os.path.join(root, file_name + '.zip')
+ repo_url = base_repo_url
+ if repo_url[-1] != '/':
+ repo_url = repo_url + '/'
+ download(_url_format.format(repo_url=repo_url, file_name=file_name),
+ path=zip_file_path,
+ overwrite=True)
+ with zipfile.ZipFile(zip_file_path) as zf:
+ zf.extractall(dir_path)
+ os.remove(zip_file_path)
+ file_path = find_params_file(dir_path)
+
+ if check_sha1(file_path, sha1_hash):
+ return file_path
+ else:
+ raise ValueError(
+ 'Downloaded file has different hash. Please try again.')
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_zoo.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_zoo.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8366e2a5461d5d6688f23e102a40944330084a4
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/model_zoo.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-05-04
+# @Function :
+
+import os
+import os.path as osp
+import glob
+import onnxruntime
+from .arcface_onnx import *
+from .retinaface import *
+#from .scrfd import *
+from .landmark import *
+from .attribute import Attribute
+from .inswapper import INSwapper
+from ..utils import download_onnx
+
+__all__ = ['get_model']
+
+
+class PickableInferenceSession(onnxruntime.InferenceSession):
+ # This is a wrapper to make the current InferenceSession class pickable.
+ def __init__(self, model_path, **kwargs):
+ super().__init__(model_path, **kwargs)
+ self.model_path = model_path
+
+ def __getstate__(self):
+ return {'model_path': self.model_path}
+
+ def __setstate__(self, values):
+ model_path = values['model_path']
+ self.__init__(model_path)
+
+class ModelRouter:
+ def __init__(self, onnx_file):
+ self.onnx_file = onnx_file
+
+ def get_model(self, **kwargs):
+ session = PickableInferenceSession(self.onnx_file, **kwargs)
+ # print(f'Applied providers: {session._providers}, with options: {session._provider_options}')
+ inputs = session.get_inputs()
+ input_cfg = inputs[0]
+ input_shape = input_cfg.shape
+ outputs = session.get_outputs()
+
+ if len(outputs)>=5:
+ return RetinaFace(model_file=self.onnx_file, session=session)
+ elif input_shape[2]==192 and input_shape[3]==192:
+ return Landmark(model_file=self.onnx_file, session=session)
+ elif input_shape[2]==96 and input_shape[3]==96:
+ return Attribute(model_file=self.onnx_file, session=session)
+ elif len(inputs)==2 and input_shape[2]==128 and input_shape[3]==128:
+ return INSwapper(model_file=self.onnx_file, session=session)
+ elif input_shape[2]==input_shape[3] and input_shape[2]>=112 and input_shape[2]%16==0:
+ return ArcFaceONNX(model_file=self.onnx_file, session=session)
+ else:
+ #raise RuntimeError('error on model routing')
+ return None
+
+def find_onnx_file(dir_path):
+ if not os.path.exists(dir_path):
+ return None
+ paths = glob.glob("%s/*.onnx" % dir_path)
+ if len(paths) == 0:
+ return None
+ paths = sorted(paths)
+ return paths[-1]
+
+def get_default_providers():
+ return ['CUDAExecutionProvider', 'CPUExecutionProvider']
+
+def get_default_provider_options():
+ return None
+
+def get_model(name, **kwargs):
+ root = kwargs.get('root', '~/.insightface')
+ root = os.path.expanduser(root)
+ model_root = osp.join(root, 'models')
+ allow_download = kwargs.get('download', False)
+ download_zip = kwargs.get('download_zip', False)
+ if not name.endswith('.onnx'):
+ model_dir = os.path.join(model_root, name)
+ model_file = find_onnx_file(model_dir)
+ if model_file is None:
+ return None
+ else:
+ model_file = name
+ if not osp.exists(model_file) and allow_download:
+ model_file = download_onnx('models', model_file, root=root, download_zip=download_zip)
+ assert osp.exists(model_file), 'model_file %s should exist'%model_file
+ assert osp.isfile(model_file), 'model_file %s should be a file'%model_file
+ router = ModelRouter(model_file)
+ providers = kwargs.get('providers', get_default_providers())
+ provider_options = kwargs.get('provider_options', get_default_provider_options())
+ model = router.get_model(providers=providers, provider_options=provider_options)
+ return model
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/retinaface.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/retinaface.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc4ad91ed70688b38503127137e928dc7e5433e1
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/retinaface.py
@@ -0,0 +1,301 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-09-18
+# @Function :
+
+from __future__ import division
+import datetime
+import numpy as np
+import onnx
+import onnxruntime
+import os
+import os.path as osp
+import cv2
+import sys
+
+def softmax(z):
+ assert len(z.shape) == 2
+ s = np.max(z, axis=1)
+ s = s[:, np.newaxis] # necessary step to do broadcasting
+ e_x = np.exp(z - s)
+ div = np.sum(e_x, axis=1)
+ div = div[:, np.newaxis] # dito
+ return e_x / div
+
+def distance2bbox(points, distance, max_shape=None):
+ """Decode distance prediction to bounding box.
+
+ Args:
+ points (Tensor): Shape (n, 2), [x, y].
+ distance (Tensor): Distance from the given point to 4
+ boundaries (left, top, right, bottom).
+ max_shape (tuple): Shape of the image.
+
+ Returns:
+ Tensor: Decoded bboxes.
+ """
+ x1 = points[:, 0] - distance[:, 0]
+ y1 = points[:, 1] - distance[:, 1]
+ x2 = points[:, 0] + distance[:, 2]
+ y2 = points[:, 1] + distance[:, 3]
+ if max_shape is not None:
+ x1 = x1.clamp(min=0, max=max_shape[1])
+ y1 = y1.clamp(min=0, max=max_shape[0])
+ x2 = x2.clamp(min=0, max=max_shape[1])
+ y2 = y2.clamp(min=0, max=max_shape[0])
+ return np.stack([x1, y1, x2, y2], axis=-1)
+
+def distance2kps(points, distance, max_shape=None):
+ """Decode distance prediction to bounding box.
+
+ Args:
+ points (Tensor): Shape (n, 2), [x, y].
+ distance (Tensor): Distance from the given point to 4
+ boundaries (left, top, right, bottom).
+ max_shape (tuple): Shape of the image.
+
+ Returns:
+ Tensor: Decoded bboxes.
+ """
+ preds = []
+ for i in range(0, distance.shape[1], 2):
+ px = points[:, i%2] + distance[:, i]
+ py = points[:, i%2+1] + distance[:, i+1]
+ if max_shape is not None:
+ px = px.clamp(min=0, max=max_shape[1])
+ py = py.clamp(min=0, max=max_shape[0])
+ preds.append(px)
+ preds.append(py)
+ return np.stack(preds, axis=-1)
+
+class RetinaFace:
+ def __init__(self, model_file=None, session=None):
+ import onnxruntime
+ self.model_file = model_file
+ self.session = session
+ self.taskname = 'detection'
+ if self.session is None:
+ assert self.model_file is not None
+ assert osp.exists(self.model_file)
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ self.center_cache = {}
+ self.nms_thresh = 0.4
+ self.det_thresh = 0.5
+ self._init_vars()
+
+ def _init_vars(self):
+ input_cfg = self.session.get_inputs()[0]
+ input_shape = input_cfg.shape
+ #print(input_shape)
+ if isinstance(input_shape[2], str):
+ self.input_size = None
+ else:
+ self.input_size = tuple(input_shape[2:4][::-1])
+ #print('image_size:', self.image_size)
+ input_name = input_cfg.name
+ self.input_shape = input_shape
+ outputs = self.session.get_outputs()
+ output_names = []
+ for o in outputs:
+ output_names.append(o.name)
+ self.input_name = input_name
+ self.output_names = output_names
+ self.input_mean = 127.5
+ self.input_std = 128.0
+ #print(self.output_names)
+ #assert len(outputs)==10 or len(outputs)==15
+ self.use_kps = False
+ self._anchor_ratio = 1.0
+ self._num_anchors = 1
+ if len(outputs)==6:
+ self.fmc = 3
+ self._feat_stride_fpn = [8, 16, 32]
+ self._num_anchors = 2
+ elif len(outputs)==9:
+ self.fmc = 3
+ self._feat_stride_fpn = [8, 16, 32]
+ self._num_anchors = 2
+ self.use_kps = True
+ elif len(outputs)==10:
+ self.fmc = 5
+ self._feat_stride_fpn = [8, 16, 32, 64, 128]
+ self._num_anchors = 1
+ elif len(outputs)==15:
+ self.fmc = 5
+ self._feat_stride_fpn = [8, 16, 32, 64, 128]
+ self._num_anchors = 1
+ self.use_kps = True
+
+ def prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+ nms_thresh = kwargs.get('nms_thresh', None)
+ if nms_thresh is not None:
+ self.nms_thresh = nms_thresh
+ det_thresh = kwargs.get('det_thresh', None)
+ if det_thresh is not None:
+ self.det_thresh = det_thresh
+ input_size = kwargs.get('input_size', None)
+ if input_size is not None:
+ if self.input_size is not None:
+ print('warning: det_size is already set in detection model, ignore')
+ else:
+ self.input_size = input_size
+
+ def forward(self, img, threshold):
+ scores_list = []
+ bboxes_list = []
+ kpss_list = []
+ input_size = tuple(img.shape[0:2][::-1])
+ blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ net_outs = self.session.run(self.output_names, {self.input_name : blob})
+
+ input_height = blob.shape[2]
+ input_width = blob.shape[3]
+ fmc = self.fmc
+ for idx, stride in enumerate(self._feat_stride_fpn):
+ scores = net_outs[idx]
+ bbox_preds = net_outs[idx+fmc]
+ bbox_preds = bbox_preds * stride
+ if self.use_kps:
+ kps_preds = net_outs[idx+fmc*2] * stride
+ height = input_height // stride
+ width = input_width // stride
+ K = height * width
+ key = (height, width, stride)
+ if key in self.center_cache:
+ anchor_centers = self.center_cache[key]
+ else:
+ #solution-1, c style:
+ #anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
+ #for i in range(height):
+ # anchor_centers[i, :, 1] = i
+ #for i in range(width):
+ # anchor_centers[:, i, 0] = i
+
+ #solution-2:
+ #ax = np.arange(width, dtype=np.float32)
+ #ay = np.arange(height, dtype=np.float32)
+ #xv, yv = np.meshgrid(np.arange(width), np.arange(height))
+ #anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
+
+ #solution-3:
+ anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+ #print(anchor_centers.shape)
+
+ anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
+ if self._num_anchors>1:
+ anchor_centers = np.stack([anchor_centers]*self._num_anchors, axis=1).reshape( (-1,2) )
+ if len(self.center_cache)<100:
+ self.center_cache[key] = anchor_centers
+
+ pos_inds = np.where(scores>=threshold)[0]
+ bboxes = distance2bbox(anchor_centers, bbox_preds)
+ pos_scores = scores[pos_inds]
+ pos_bboxes = bboxes[pos_inds]
+ scores_list.append(pos_scores)
+ bboxes_list.append(pos_bboxes)
+ if self.use_kps:
+ kpss = distance2kps(anchor_centers, kps_preds)
+ #kpss = kps_preds
+ kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
+ pos_kpss = kpss[pos_inds]
+ kpss_list.append(pos_kpss)
+ return scores_list, bboxes_list, kpss_list
+
+ def detect(self, img, input_size = None, max_num=0, metric='default'):
+ assert input_size is not None or self.input_size is not None
+ input_size = self.input_size if input_size is None else input_size
+
+ im_ratio = float(img.shape[0]) / img.shape[1]
+ model_ratio = float(input_size[1]) / input_size[0]
+ if im_ratio>model_ratio:
+ new_height = input_size[1]
+ new_width = int(new_height / im_ratio)
+ else:
+ new_width = input_size[0]
+ new_height = int(new_width * im_ratio)
+ det_scale = float(new_height) / img.shape[0]
+ resized_img = cv2.resize(img, (new_width, new_height))
+ det_img = np.zeros( (input_size[1], input_size[0], 3), dtype=np.uint8 )
+ det_img[:new_height, :new_width, :] = resized_img
+
+ scores_list, bboxes_list, kpss_list = self.forward(det_img, self.det_thresh)
+
+ scores = np.vstack(scores_list)
+ scores_ravel = scores.ravel()
+ order = scores_ravel.argsort()[::-1]
+ bboxes = np.vstack(bboxes_list) / det_scale
+ if self.use_kps:
+ kpss = np.vstack(kpss_list) / det_scale
+ pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+ pre_det = pre_det[order, :]
+ keep = self.nms(pre_det)
+ det = pre_det[keep, :]
+ if self.use_kps:
+ kpss = kpss[order,:,:]
+ kpss = kpss[keep,:,:]
+ else:
+ kpss = None
+ if max_num > 0 and det.shape[0] > max_num:
+ area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
+ det[:, 1])
+ img_center = img.shape[0] // 2, img.shape[1] // 2
+ offsets = np.vstack([
+ (det[:, 0] + det[:, 2]) / 2 - img_center[1],
+ (det[:, 1] + det[:, 3]) / 2 - img_center[0]
+ ])
+ offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+ if metric=='max':
+ values = area
+ else:
+ values = area - offset_dist_squared * 2.0 # some extra weight on the centering
+ bindex = np.argsort(
+ values)[::-1] # some extra weight on the centering
+ bindex = bindex[0:max_num]
+ det = det[bindex, :]
+ if kpss is not None:
+ kpss = kpss[bindex, :]
+ return det, kpss
+
+ def nms(self, dets):
+ thresh = self.nms_thresh
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thresh)[0]
+ order = order[inds + 1]
+
+ return keep
+
+def get_retinaface(name, download=False, root='~/.insightface/models', **kwargs):
+ if not download:
+ assert os.path.exists(name)
+ return RetinaFace(name)
+ else:
+ from .model_store import get_model_file
+ _file = get_model_file("retinaface_%s" % name, root=root)
+ return retinaface(_file)
+
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/scrfd.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/scrfd.py
new file mode 100644
index 0000000000000000000000000000000000000000..674db4bba761157592dfb95c5d1638da1099f89c
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/model_zoo/scrfd.py
@@ -0,0 +1,348 @@
+# -*- coding: utf-8 -*-
+# @Organization : insightface.ai
+# @Author : Jia Guo
+# @Time : 2021-05-04
+# @Function :
+
+from __future__ import division
+import datetime
+import numpy as np
+import onnx
+import onnxruntime
+import os
+import os.path as osp
+import cv2
+import sys
+
+def softmax(z):
+ assert len(z.shape) == 2
+ s = np.max(z, axis=1)
+ s = s[:, np.newaxis] # necessary step to do broadcasting
+ e_x = np.exp(z - s)
+ div = np.sum(e_x, axis=1)
+ div = div[:, np.newaxis] # dito
+ return e_x / div
+
+def distance2bbox(points, distance, max_shape=None):
+ """Decode distance prediction to bounding box.
+
+ Args:
+ points (Tensor): Shape (n, 2), [x, y].
+ distance (Tensor): Distance from the given point to 4
+ boundaries (left, top, right, bottom).
+ max_shape (tuple): Shape of the image.
+
+ Returns:
+ Tensor: Decoded bboxes.
+ """
+ x1 = points[:, 0] - distance[:, 0]
+ y1 = points[:, 1] - distance[:, 1]
+ x2 = points[:, 0] + distance[:, 2]
+ y2 = points[:, 1] + distance[:, 3]
+ if max_shape is not None:
+ x1 = x1.clamp(min=0, max=max_shape[1])
+ y1 = y1.clamp(min=0, max=max_shape[0])
+ x2 = x2.clamp(min=0, max=max_shape[1])
+ y2 = y2.clamp(min=0, max=max_shape[0])
+ return np.stack([x1, y1, x2, y2], axis=-1)
+
+def distance2kps(points, distance, max_shape=None):
+ """Decode distance prediction to bounding box.
+
+ Args:
+ points (Tensor): Shape (n, 2), [x, y].
+ distance (Tensor): Distance from the given point to 4
+ boundaries (left, top, right, bottom).
+ max_shape (tuple): Shape of the image.
+
+ Returns:
+ Tensor: Decoded bboxes.
+ """
+ preds = []
+ for i in range(0, distance.shape[1], 2):
+ px = points[:, i%2] + distance[:, i]
+ py = points[:, i%2+1] + distance[:, i+1]
+ if max_shape is not None:
+ px = px.clamp(min=0, max=max_shape[1])
+ py = py.clamp(min=0, max=max_shape[0])
+ preds.append(px)
+ preds.append(py)
+ return np.stack(preds, axis=-1)
+
+class SCRFD:
+ def __init__(self, model_file=None, session=None):
+ import onnxruntime
+ self.model_file = model_file
+ self.session = session
+ self.taskname = 'detection'
+ self.batched = False
+ if self.session is None:
+ assert self.model_file is not None
+ assert osp.exists(self.model_file)
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ self.center_cache = {}
+ self.nms_thresh = 0.4
+ self.det_thresh = 0.5
+ self._init_vars()
+
+ def _init_vars(self):
+ input_cfg = self.session.get_inputs()[0]
+ input_shape = input_cfg.shape
+ #print(input_shape)
+ if isinstance(input_shape[2], str):
+ self.input_size = None
+ else:
+ self.input_size = tuple(input_shape[2:4][::-1])
+ #print('image_size:', self.image_size)
+ input_name = input_cfg.name
+ self.input_shape = input_shape
+ outputs = self.session.get_outputs()
+ if len(outputs[0].shape) == 3:
+ self.batched = True
+ output_names = []
+ for o in outputs:
+ output_names.append(o.name)
+ self.input_name = input_name
+ self.output_names = output_names
+ self.input_mean = 127.5
+ self.input_std = 128.0
+ #print(self.output_names)
+ #assert len(outputs)==10 or len(outputs)==15
+ self.use_kps = False
+ self._anchor_ratio = 1.0
+ self._num_anchors = 1
+ if len(outputs)==6:
+ self.fmc = 3
+ self._feat_stride_fpn = [8, 16, 32]
+ self._num_anchors = 2
+ elif len(outputs)==9:
+ self.fmc = 3
+ self._feat_stride_fpn = [8, 16, 32]
+ self._num_anchors = 2
+ self.use_kps = True
+ elif len(outputs)==10:
+ self.fmc = 5
+ self._feat_stride_fpn = [8, 16, 32, 64, 128]
+ self._num_anchors = 1
+ elif len(outputs)==15:
+ self.fmc = 5
+ self._feat_stride_fpn = [8, 16, 32, 64, 128]
+ self._num_anchors = 1
+ self.use_kps = True
+
+ def prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+ nms_thresh = kwargs.get('nms_thresh', None)
+ if nms_thresh is not None:
+ self.nms_thresh = nms_thresh
+ det_thresh = kwargs.get('det_thresh', None)
+ if det_thresh is not None:
+ self.det_thresh = det_thresh
+ input_size = kwargs.get('input_size', None)
+ if input_size is not None:
+ if self.input_size is not None:
+ print('warning: det_size is already set in scrfd model, ignore')
+ else:
+ self.input_size = input_size
+
+ def forward(self, img, threshold):
+ scores_list = []
+ bboxes_list = []
+ kpss_list = []
+ input_size = tuple(img.shape[0:2][::-1])
+ blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ net_outs = self.session.run(self.output_names, {self.input_name : blob})
+
+ input_height = blob.shape[2]
+ input_width = blob.shape[3]
+ fmc = self.fmc
+ for idx, stride in enumerate(self._feat_stride_fpn):
+ # If model support batch dim, take first output
+ if self.batched:
+ scores = net_outs[idx][0]
+ bbox_preds = net_outs[idx + fmc][0]
+ bbox_preds = bbox_preds * stride
+ if self.use_kps:
+ kps_preds = net_outs[idx + fmc * 2][0] * stride
+ # If model doesn't support batching take output as is
+ else:
+ scores = net_outs[idx]
+ bbox_preds = net_outs[idx + fmc]
+ bbox_preds = bbox_preds * stride
+ if self.use_kps:
+ kps_preds = net_outs[idx + fmc * 2] * stride
+
+ height = input_height // stride
+ width = input_width // stride
+ K = height * width
+ key = (height, width, stride)
+ if key in self.center_cache:
+ anchor_centers = self.center_cache[key]
+ else:
+ #solution-1, c style:
+ #anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
+ #for i in range(height):
+ # anchor_centers[i, :, 1] = i
+ #for i in range(width):
+ # anchor_centers[:, i, 0] = i
+
+ #solution-2:
+ #ax = np.arange(width, dtype=np.float32)
+ #ay = np.arange(height, dtype=np.float32)
+ #xv, yv = np.meshgrid(np.arange(width), np.arange(height))
+ #anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
+
+ #solution-3:
+ anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+ #print(anchor_centers.shape)
+
+ anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
+ if self._num_anchors>1:
+ anchor_centers = np.stack([anchor_centers]*self._num_anchors, axis=1).reshape( (-1,2) )
+ if len(self.center_cache)<100:
+ self.center_cache[key] = anchor_centers
+
+ pos_inds = np.where(scores>=threshold)[0]
+ bboxes = distance2bbox(anchor_centers, bbox_preds)
+ pos_scores = scores[pos_inds]
+ pos_bboxes = bboxes[pos_inds]
+ scores_list.append(pos_scores)
+ bboxes_list.append(pos_bboxes)
+ if self.use_kps:
+ kpss = distance2kps(anchor_centers, kps_preds)
+ #kpss = kps_preds
+ kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
+ pos_kpss = kpss[pos_inds]
+ kpss_list.append(pos_kpss)
+ return scores_list, bboxes_list, kpss_list
+
+ def detect(self, img, input_size = None, max_num=0, metric='default'):
+ assert input_size is not None or self.input_size is not None
+ input_size = self.input_size if input_size is None else input_size
+
+ im_ratio = float(img.shape[0]) / img.shape[1]
+ model_ratio = float(input_size[1]) / input_size[0]
+ if im_ratio>model_ratio:
+ new_height = input_size[1]
+ new_width = int(new_height / im_ratio)
+ else:
+ new_width = input_size[0]
+ new_height = int(new_width * im_ratio)
+ det_scale = float(new_height) / img.shape[0]
+ resized_img = cv2.resize(img, (new_width, new_height))
+ det_img = np.zeros( (input_size[1], input_size[0], 3), dtype=np.uint8 )
+ det_img[:new_height, :new_width, :] = resized_img
+
+ scores_list, bboxes_list, kpss_list = self.forward(det_img, self.det_thresh)
+
+ scores = np.vstack(scores_list)
+ scores_ravel = scores.ravel()
+ order = scores_ravel.argsort()[::-1]
+ bboxes = np.vstack(bboxes_list) / det_scale
+ if self.use_kps:
+ kpss = np.vstack(kpss_list) / det_scale
+ pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+ pre_det = pre_det[order, :]
+ keep = self.nms(pre_det)
+ det = pre_det[keep, :]
+ if self.use_kps:
+ kpss = kpss[order,:,:]
+ kpss = kpss[keep,:,:]
+ else:
+ kpss = None
+ if max_num > 0 and det.shape[0] > max_num:
+ area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
+ det[:, 1])
+ img_center = img.shape[0] // 2, img.shape[1] // 2
+ offsets = np.vstack([
+ (det[:, 0] + det[:, 2]) / 2 - img_center[1],
+ (det[:, 1] + det[:, 3]) / 2 - img_center[0]
+ ])
+ offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+ if metric=='max':
+ values = area
+ else:
+ values = area - offset_dist_squared * 2.0 # some extra weight on the centering
+ bindex = np.argsort(
+ values)[::-1] # some extra weight on the centering
+ bindex = bindex[0:max_num]
+ det = det[bindex, :]
+ if kpss is not None:
+ kpss = kpss[bindex, :]
+ return det, kpss
+
+ def nms(self, dets):
+ thresh = self.nms_thresh
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thresh)[0]
+ order = order[inds + 1]
+
+ return keep
+
+def get_scrfd(name, download=False, root='~/.insightface/models', **kwargs):
+ if not download:
+ assert os.path.exists(name)
+ return SCRFD(name)
+ else:
+ from .model_store import get_model_file
+ _file = get_model_file("scrfd_%s" % name, root=root)
+ return SCRFD(_file)
+
+
+def scrfd_2p5gkps(**kwargs):
+ return get_scrfd("2p5gkps", download=True, **kwargs)
+
+
+if __name__ == '__main__':
+ import glob
+ detector = SCRFD(model_file='./det.onnx')
+ detector.prepare(-1)
+ img_paths = ['tests/data/t1.jpg']
+ for img_path in img_paths:
+ img = cv2.imread(img_path)
+
+ for _ in range(1):
+ ta = datetime.datetime.now()
+ #bboxes, kpss = detector.detect(img, 0.5, input_size = (640, 640))
+ bboxes, kpss = detector.detect(img, 0.5)
+ tb = datetime.datetime.now()
+ print('all cost:', (tb-ta).total_seconds()*1000)
+ print(img_path, bboxes.shape)
+ if kpss is not None:
+ print(kpss.shape)
+ for i in range(bboxes.shape[0]):
+ bbox = bboxes[i]
+ x1,y1,x2,y2,score = bbox.astype(np.int)
+ cv2.rectangle(img, (x1,y1) , (x2,y2) , (255,0,0) , 2)
+ if kpss is not None:
+ kps = kpss[i]
+ for kp in kps:
+ kp = kp.astype(np.int)
+ cv2.circle(img, tuple(kp) , 1, (0,0,255) , 2)
+ filename = img_path.split('/')[-1]
+ print('output:', filename)
+ cv2.imwrite('./outputs/%s'%filename, img)
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__init__.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6960431b1bd6db38890e391c4c94dd2182f2e1fd
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__init__.py
@@ -0,0 +1,6 @@
+from __future__ import absolute_import
+
+from .storage import download, ensure_available, download_onnx
+from .filesystem import get_model_dir
+from .filesystem import makedirs, try_import_dali
+from .constant import *
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9a1d8421a7b2c978abd367b0c52706ca7f4e5d3
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..185cb017258b0287e0a95382a0164d7a3250bbeb
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0300a80d3e6daea94e2c9538747c6ea9378bf844
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3e3b1220b48dfa53bb3671ad110fdcfc91b15fd
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/constant.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab9c27366a94ab842c44304bb216519ac968d0cb
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f866af2607732caf05d2fbb52a429fcd77731a9b
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/download.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a25852709b126462ea4c28e2f9fdc4bb62ec2ff
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb81e06d9ebaa89f1d026b25a1ddf92d30043d96
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/face_align.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b13be309628204bc50e87bd600b97634a8251b0f
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ec57b384c26f75d02e5386006f129c2bd91be0d2
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/filesystem.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c22244b0bbc68ddbd04d0a20b15da638ce0644d3
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dccb21e83c0ae7af60442a86c6d4336ca0ed8954
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/storage.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-310.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44a6ba41e7b44f55bfbea5bb0a72c9c554edb1f8
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-310.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-39.pyc b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78ea73f1c3e7d8641de4e095c628b9c003c5a095
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/__pycache__/transform.cpython-39.pyc differ
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/constant.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/constant.py
new file mode 100644
index 0000000000000000000000000000000000000000..8860ff077ae7227235591edfc84c0cdc227a6432
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/constant.py
@@ -0,0 +1,3 @@
+
+DEFAULT_MP_NAME = 'buffalo_l'
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/download.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cda84dede45b81dcd99161d87792b6c409fa279
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/download.py
@@ -0,0 +1,95 @@
+"""
+This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/download.py
+"""
+import os
+import hashlib
+import requests
+from tqdm import tqdm
+
+
+def check_sha1(filename, sha1_hash):
+ """Check whether the sha1 hash of the file content matches the expected hash.
+ Parameters
+ ----------
+ filename : str
+ Path to the file.
+ sha1_hash : str
+ Expected sha1 hash in hexadecimal digits.
+ Returns
+ -------
+ bool
+ Whether the file content matches the expected hash.
+ """
+ sha1 = hashlib.sha1()
+ with open(filename, 'rb') as f:
+ while True:
+ data = f.read(1048576)
+ if not data:
+ break
+ sha1.update(data)
+
+ sha1_file = sha1.hexdigest()
+ l = min(len(sha1_file), len(sha1_hash))
+ return sha1.hexdigest()[0:l] == sha1_hash[0:l]
+
+
+def download_file(url, path=None, overwrite=False, sha1_hash=None):
+ """Download an given URL
+ Parameters
+ ----------
+ url : str
+ URL to download
+ path : str, optional
+ Destination path to store downloaded file. By default stores to the
+ current directory with same name as in url.
+ overwrite : bool, optional
+ Whether to overwrite destination file if already exists.
+ sha1_hash : str, optional
+ Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
+ but doesn't match.
+ Returns
+ -------
+ str
+ The file path of the downloaded file.
+ """
+ if path is None:
+ fname = url.split('/')[-1]
+ else:
+ path = os.path.expanduser(path)
+ if os.path.isdir(path):
+ fname = os.path.join(path, url.split('/')[-1])
+ else:
+ fname = path
+
+ if overwrite or not os.path.exists(fname) or (
+ sha1_hash and not check_sha1(fname, sha1_hash)):
+ dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
+ print('Downloading %s from %s...' % (fname, url))
+ r = requests.get(url, stream=True)
+ if r.status_code != 200:
+ raise RuntimeError("Failed downloading url %s" % url)
+ total_length = r.headers.get('content-length')
+ with open(fname, 'wb') as f:
+ if total_length is None: # no content length header
+ for chunk in r.iter_content(chunk_size=1024):
+ if chunk: # filter out keep-alive new chunks
+ f.write(chunk)
+ else:
+ total_length = int(total_length)
+ for chunk in tqdm(r.iter_content(chunk_size=1024),
+ total=int(total_length / 1024. + 0.5),
+ unit='KB',
+ unit_scale=False,
+ dynamic_ncols=True):
+ f.write(chunk)
+
+ if sha1_hash and not check_sha1(fname, sha1_hash):
+ raise UserWarning('File {} is downloaded but the content hash does not match. ' \
+ 'The repo may be outdated or download may be incomplete. ' \
+ 'If the "repo_url" is overridden, consider switching to ' \
+ 'the default repo.'.format(fname))
+
+ return fname
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/face_align.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/face_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..226628b39cf743947df230feffbb97bf5c585e1d
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/face_align.py
@@ -0,0 +1,103 @@
+import cv2
+import numpy as np
+from skimage import transform as trans
+
+
+arcface_dst = np.array(
+ [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
+ [41.5493, 92.3655], [70.7299, 92.2041]],
+ dtype=np.float32)
+
+def estimate_norm(lmk, image_size=112,mode='arcface'):
+ assert lmk.shape == (5, 2)
+ assert image_size%112==0 or image_size%128==0
+ if image_size%112==0:
+ ratio = float(image_size)/112.0
+ diff_x = 0
+ else:
+ ratio = float(image_size)/128.0
+ diff_x = 8.0*ratio
+ dst = arcface_dst * ratio
+ dst[:,0] += diff_x
+ tform = trans.SimilarityTransform()
+ tform.estimate(lmk, dst)
+ M = tform.params[0:2, :]
+ return M
+
+def norm_crop(img, landmark, image_size=112, mode='arcface'):
+ M = estimate_norm(landmark, image_size, mode)
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
+ return warped
+
+def norm_crop2(img, landmark, image_size=112, mode='arcface'):
+ M = estimate_norm(landmark, image_size, mode)
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
+ return warped, M
+
+def square_crop(im, S):
+ if im.shape[0] > im.shape[1]:
+ height = S
+ width = int(float(im.shape[1]) / im.shape[0] * S)
+ scale = float(S) / im.shape[0]
+ else:
+ width = S
+ height = int(float(im.shape[0]) / im.shape[1] * S)
+ scale = float(S) / im.shape[1]
+ resized_im = cv2.resize(im, (width, height))
+ det_im = np.zeros((S, S, 3), dtype=np.uint8)
+ det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
+ return det_im, scale
+
+
+def transform(data, center, output_size, scale, rotation):
+ scale_ratio = scale
+ rot = float(rotation) * np.pi / 180.0
+ #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
+ t1 = trans.SimilarityTransform(scale=scale_ratio)
+ cx = center[0] * scale_ratio
+ cy = center[1] * scale_ratio
+ t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
+ t3 = trans.SimilarityTransform(rotation=rot)
+ t4 = trans.SimilarityTransform(translation=(output_size / 2,
+ output_size / 2))
+ t = t1 + t2 + t3 + t4
+ M = t.params[0:2]
+ cropped = cv2.warpAffine(data,
+ M, (output_size, output_size),
+ borderValue=0.0)
+ return cropped, M
+
+
+def trans_points2d(pts, M):
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
+ for i in range(pts.shape[0]):
+ pt = pts[i]
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
+ new_pt = np.dot(M, new_pt)
+ #print('new_pt', new_pt.shape, new_pt)
+ new_pts[i] = new_pt[0:2]
+
+ return new_pts
+
+
+def trans_points3d(pts, M):
+ scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
+ #print(scale)
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
+ for i in range(pts.shape[0]):
+ pt = pts[i]
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
+ new_pt = np.dot(M, new_pt)
+ #print('new_pt', new_pt.shape, new_pt)
+ new_pts[i][0:2] = new_pt[0:2]
+ new_pts[i][2] = pts[i][2] * scale
+
+ return new_pts
+
+
+def trans_points(pts, M):
+ if pts.shape[1] == 2:
+ return trans_points2d(pts, M)
+ else:
+ return trans_points3d(pts, M)
+
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/filesystem.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/filesystem.py
new file mode 100644
index 0000000000000000000000000000000000000000..01e3851975bdcbbf7f5eeb7e68e70a36dc040535
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/filesystem.py
@@ -0,0 +1,157 @@
+"""
+This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/filesystem.py
+"""
+import os
+import os.path as osp
+import errno
+
+
+def get_model_dir(name, root='~/.insightface'):
+ root = os.path.expanduser(root)
+ model_dir = osp.join(root, 'models', name)
+ return model_dir
+
+def makedirs(path):
+ """Create directory recursively if not exists.
+ Similar to `makedir -p`, you can skip checking existence before this function.
+
+ Parameters
+ ----------
+ path : str
+ Path of the desired dir
+ """
+ try:
+ os.makedirs(path)
+ except OSError as exc:
+ if exc.errno != errno.EEXIST:
+ raise
+
+
+def try_import(package, message=None):
+ """Try import specified package, with custom message support.
+
+ Parameters
+ ----------
+ package : str
+ The name of the targeting package.
+ message : str, default is None
+ If not None, this function will raise customized error message when import error is found.
+
+
+ Returns
+ -------
+ module if found, raise ImportError otherwise
+
+ """
+ try:
+ return __import__(package)
+ except ImportError as e:
+ if not message:
+ raise e
+ raise ImportError(message)
+
+
+def try_import_cv2():
+ """Try import cv2 at runtime.
+
+ Returns
+ -------
+ cv2 module if found. Raise ImportError otherwise
+
+ """
+ msg = "cv2 is required, you can install by package manager, e.g. 'apt-get', \
+ or `pip install opencv-python --user` (note that this is unofficial PYPI package)."
+
+ return try_import('cv2', msg)
+
+
+def try_import_mmcv():
+ """Try import mmcv at runtime.
+
+ Returns
+ -------
+ mmcv module if found. Raise ImportError otherwise
+
+ """
+ msg = "mmcv is required, you can install by first `pip install Cython --user` \
+ and then `pip install mmcv --user` (note that this is unofficial PYPI package)."
+
+ return try_import('mmcv', msg)
+
+
+def try_import_rarfile():
+ """Try import rarfile at runtime.
+
+ Returns
+ -------
+ rarfile module if found. Raise ImportError otherwise
+
+ """
+ msg = "rarfile is required, you can install by first `sudo apt-get install unrar` \
+ and then `pip install rarfile --user` (note that this is unofficial PYPI package)."
+
+ return try_import('rarfile', msg)
+
+
+def import_try_install(package, extern_url=None):
+ """Try import the specified package.
+ If the package not installed, try use pip to install and import if success.
+
+ Parameters
+ ----------
+ package : str
+ The name of the package trying to import.
+ extern_url : str or None, optional
+ The external url if package is not hosted on PyPI.
+ For example, you can install a package using:
+ "pip install git+http://github.com/user/repo/tarball/master/egginfo=xxx".
+ In this case, you can pass the url to the extern_url.
+
+ Returns
+ -------
+
+ The imported python module.
+
+ """
+ try:
+ return __import__(package)
+ except ImportError:
+ try:
+ from pip import main as pipmain
+ except ImportError:
+ from pip._internal import main as pipmain
+
+ # trying to install package
+ url = package if extern_url is None else extern_url
+ pipmain(['install', '--user',
+ url]) # will raise SystemExit Error if fails
+
+ # trying to load again
+ try:
+ return __import__(package)
+ except ImportError:
+ import sys
+ import site
+ user_site = site.getusersitepackages()
+ if user_site not in sys.path:
+ sys.path.append(user_site)
+ return __import__(package)
+ return __import__(package)
+
+
+def try_import_dali():
+ """Try import NVIDIA DALI at runtime.
+ """
+ try:
+ dali = __import__('nvidia.dali', fromlist=['pipeline', 'ops', 'types'])
+ dali.Pipeline = dali.pipeline.Pipeline
+ except ImportError:
+
+ class dali:
+ class Pipeline:
+ def __init__(self):
+ raise NotImplementedError(
+ "DALI not found, please check if you installed it correctly."
+ )
+
+ return dali
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/storage.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bf37e2d17b28dee2a8839484778815f87fc4a9c
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/storage.py
@@ -0,0 +1,52 @@
+
+import os
+import os.path as osp
+import zipfile
+from .download import download_file
+
+BASE_REPO_URL = 'https://github.com/deepinsight/insightface/releases/download/v0.7'
+
+def download(sub_dir, name, force=False, root='~/.insightface'):
+ _root = os.path.expanduser(root)
+ dir_path = os.path.join(_root, sub_dir, name)
+ if osp.exists(dir_path) and not force:
+ return dir_path
+ print('download_path:', dir_path)
+ zip_file_path = os.path.join(_root, sub_dir, name + '.zip')
+ model_url = "%s/%s.zip"%(BASE_REPO_URL, name)
+ download_file(model_url,
+ path=zip_file_path,
+ overwrite=True)
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+ with zipfile.ZipFile(zip_file_path) as zf:
+ zf.extractall(dir_path)
+ #os.remove(zip_file_path)
+ return dir_path
+
+def ensure_available(sub_dir, name, root='~/.insightface'):
+ return download(sub_dir, name, force=False, root=root)
+
+def download_onnx(sub_dir, model_file, force=False, root='~/.insightface', download_zip=False):
+ _root = os.path.expanduser(root)
+ model_root = osp.join(_root, sub_dir)
+ new_model_file = osp.join(model_root, model_file)
+ if osp.exists(new_model_file) and not force:
+ return new_model_file
+ if not osp.exists(model_root):
+ os.makedirs(model_root)
+ print('download_path:', new_model_file)
+ if not download_zip:
+ model_url = "%s/%s"%(BASE_REPO_URL, model_file)
+ download_file(model_url,
+ path=new_model_file,
+ overwrite=True)
+ else:
+ model_url = "%s/%s.zip"%(BASE_REPO_URL, model_file)
+ zip_file_path = new_model_file+".zip"
+ download_file(model_url,
+ path=zip_file_path,
+ overwrite=True)
+ with zipfile.ZipFile(zip_file_path) as zf:
+ zf.extractall(model_root)
+ return new_model_file
diff --git a/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/transform.py b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..06531d257b694211a0b9a09c9d741b9b2ff53bfe
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/dependencies/insightface/utils/transform.py
@@ -0,0 +1,116 @@
+import cv2
+import math
+import numpy as np
+from skimage import transform as trans
+
+
+def transform(data, center, output_size, scale, rotation):
+ scale_ratio = scale
+ rot = float(rotation) * np.pi / 180.0
+ #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
+ t1 = trans.SimilarityTransform(scale=scale_ratio)
+ cx = center[0] * scale_ratio
+ cy = center[1] * scale_ratio
+ t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
+ t3 = trans.SimilarityTransform(rotation=rot)
+ t4 = trans.SimilarityTransform(translation=(output_size / 2,
+ output_size / 2))
+ t = t1 + t2 + t3 + t4
+ M = t.params[0:2]
+ cropped = cv2.warpAffine(data,
+ M, (output_size, output_size),
+ borderValue=0.0)
+ return cropped, M
+
+
+def trans_points2d(pts, M):
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
+ for i in range(pts.shape[0]):
+ pt = pts[i]
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
+ new_pt = np.dot(M, new_pt)
+ #print('new_pt', new_pt.shape, new_pt)
+ new_pts[i] = new_pt[0:2]
+
+ return new_pts
+
+
+def trans_points3d(pts, M):
+ scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
+ #print(scale)
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
+ for i in range(pts.shape[0]):
+ pt = pts[i]
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
+ new_pt = np.dot(M, new_pt)
+ #print('new_pt', new_pt.shape, new_pt)
+ new_pts[i][0:2] = new_pt[0:2]
+ new_pts[i][2] = pts[i][2] * scale
+
+ return new_pts
+
+
+def trans_points(pts, M):
+ if pts.shape[1] == 2:
+ return trans_points2d(pts, M)
+ else:
+ return trans_points3d(pts, M)
+
+def estimate_affine_matrix_3d23d(X, Y):
+ ''' Using least-squares solution
+ Args:
+ X: [n, 3]. 3d points(fixed)
+ Y: [n, 3]. corresponding 3d points(moving). Y = PX
+ Returns:
+ P_Affine: (3, 4). Affine camera matrix (the third row is [0, 0, 0, 1]).
+ '''
+ X_homo = np.hstack((X, np.ones([X.shape[0],1]))) #n x 4
+ P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
+ return P
+
+def P2sRt(P):
+ ''' decompositing camera matrix P
+ Args:
+ P: (3, 4). Affine Camera Matrix.
+ Returns:
+ s: scale factor.
+ R: (3, 3). rotation matrix.
+ t: (3,). translation.
+ '''
+ t = P[:, 3]
+ R1 = P[0:1, :3]
+ R2 = P[1:2, :3]
+ s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2.0
+ r1 = R1/np.linalg.norm(R1)
+ r2 = R2/np.linalg.norm(R2)
+ r3 = np.cross(r1, r2)
+
+ R = np.concatenate((r1, r2, r3), 0)
+ return s, R, t
+
+def matrix2angle(R):
+ ''' get three Euler angles from Rotation Matrix
+ Args:
+ R: (3,3). rotation matrix
+ Returns:
+ x: pitch
+ y: yaw
+ z: roll
+ '''
+ sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
+
+ singular = sy < 1e-6
+
+ if not singular :
+ x = math.atan2(R[2,1] , R[2,2])
+ y = math.atan2(-R[2,0], sy)
+ z = math.atan2(R[1,0], R[0,0])
+ else :
+ x = math.atan2(-R[1,2], R[1,1])
+ y = math.atan2(-R[2,0], sy)
+ z = 0
+
+ # rx, ry, rz = np.rad2deg(x), np.rad2deg(y), np.rad2deg(z)
+ rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
+ return rx, ry, rz
+
diff --git a/subprocess/LivePortrait/src/utils/face_analysis_diy.py b/subprocess/LivePortrait/src/utils/face_analysis_diy.py
new file mode 100644
index 0000000000000000000000000000000000000000..f13a659134216958da3c7273aabf3b0f96fb320d
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/face_analysis_diy.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+
+"""
+face detectoin and alignment using InsightFace
+"""
+
+import numpy as np
+from .rprint import rlog as log
+from .dependencies.insightface.app import FaceAnalysis
+from .dependencies.insightface.app.common import Face
+from .timer import Timer
+
+
+def sort_by_direction(faces, direction: str = 'large-small', face_center=None):
+ if len(faces) <= 0:
+ return faces
+
+ if direction == 'left-right':
+ return sorted(faces, key=lambda face: face['bbox'][0])
+ if direction == 'right-left':
+ return sorted(faces, key=lambda face: face['bbox'][0], reverse=True)
+ if direction == 'top-bottom':
+ return sorted(faces, key=lambda face: face['bbox'][1])
+ if direction == 'bottom-top':
+ return sorted(faces, key=lambda face: face['bbox'][1], reverse=True)
+ if direction == 'small-large':
+ return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]))
+ if direction == 'large-small':
+ return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]), reverse=True)
+ if direction == 'distance-from-retarget-face':
+ return sorted(faces, key=lambda face: (((face['bbox'][2]+face['bbox'][0])/2-face_center[0])**2+((face['bbox'][3]+face['bbox'][1])/2-face_center[1])**2)**0.5)
+ return faces
+
+
+class FaceAnalysisDIY(FaceAnalysis):
+ def __init__(self, name='buffalo_l', root='~/.insightface', allowed_modules=None, **kwargs):
+ super().__init__(name=name, root=root, allowed_modules=allowed_modules, **kwargs)
+
+ self.timer = Timer()
+
+ def get(self, img_bgr, **kwargs):
+ max_num = kwargs.get('max_face_num', 0) # the number of the detected faces, 0 means no limit
+ flag_do_landmark_2d_106 = kwargs.get('flag_do_landmark_2d_106', True) # whether to do 106-point detection
+ direction = kwargs.get('direction', 'large-small') # sorting direction
+ face_center = None
+
+ bboxes, kpss = self.det_model.detect(img_bgr, max_num=max_num, metric='default')
+ if bboxes.shape[0] == 0:
+ return []
+ ret = []
+ for i in range(bboxes.shape[0]):
+ bbox = bboxes[i, 0:4]
+ det_score = bboxes[i, 4]
+ kps = None
+ if kpss is not None:
+ kps = kpss[i]
+ face = Face(bbox=bbox, kps=kps, det_score=det_score)
+ for taskname, model in self.models.items():
+ if taskname == 'detection':
+ continue
+
+ if (not flag_do_landmark_2d_106) and taskname == 'landmark_2d_106':
+ continue
+
+ # print(f'taskname: {taskname}')
+ model.get(img_bgr, face)
+ ret.append(face)
+
+ ret = sort_by_direction(ret, direction, face_center)
+ return ret
+
+ def warmup(self):
+ self.timer.tic()
+
+ img_bgr = np.zeros((512, 512, 3), dtype=np.uint8)
+ self.get(img_bgr)
+
+ elapse = self.timer.toc()
+ log(f'FaceAnalysisDIY warmup time: {elapse:.3f}s')
diff --git a/subprocess/LivePortrait/src/utils/helper.py b/subprocess/LivePortrait/src/utils/helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e2af94e137b6447c88ec4df3c7c2c1b1bd94b8a
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/helper.py
@@ -0,0 +1,145 @@
+# coding: utf-8
+
+"""
+utility functions and classes to handle feature extraction and model loading
+"""
+
+import os
+import os.path as osp
+import torch
+from collections import OrderedDict
+
+from ..modules.spade_generator import SPADEDecoder
+from ..modules.warping_network import WarpingNetwork
+from ..modules.motion_extractor import MotionExtractor
+from ..modules.appearance_feature_extractor import AppearanceFeatureExtractor
+from ..modules.stitching_retargeting_network import StitchingRetargetingNetwork
+
+
+def suffix(filename):
+ """a.jpg -> jpg"""
+ pos = filename.rfind(".")
+ if pos == -1:
+ return ""
+ return filename[pos + 1:]
+
+
+def prefix(filename):
+ """a.jpg -> a"""
+ pos = filename.rfind(".")
+ if pos == -1:
+ return filename
+ return filename[:pos]
+
+
+def basename(filename):
+ """a/b/c.jpg -> c"""
+ return prefix(osp.basename(filename))
+
+
+def remove_suffix(filepath):
+ """a/b/c.jpg -> a/b/c"""
+ return osp.join(osp.dirname(filepath), basename(filepath))
+
+
+def is_video(file_path):
+ if file_path.lower().endswith((".mp4", ".mov", ".avi", ".webm")) or osp.isdir(file_path):
+ return True
+ return False
+
+
+def is_template(file_path):
+ if file_path.endswith(".pkl"):
+ return True
+ return False
+
+
+def mkdir(d, log=False):
+ # return self-assined `d`, for one line code
+ if not osp.exists(d):
+ os.makedirs(d, exist_ok=True)
+ if log:
+ print(f"Make dir: {d}")
+ return d
+
+
+def squeeze_tensor_to_numpy(tensor):
+ out = tensor.data.squeeze(0).cpu().numpy()
+ return out
+
+
+def dct2device(dct: dict, device):
+ for key in dct:
+ dct[key] = torch.tensor(dct[key]).to(device)
+ return dct
+
+
+def concat_feat(kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
+ """
+ kp_source: (bs, k, 3)
+ kp_driving: (bs, k, 3)
+ Return: (bs, 2k*3)
+ """
+ bs_src = kp_source.shape[0]
+ bs_dri = kp_driving.shape[0]
+ assert bs_src == bs_dri, 'batch size must be equal'
+
+ feat = torch.cat([kp_source.view(bs_src, -1), kp_driving.view(bs_dri, -1)], dim=1)
+ return feat
+
+
+def remove_ddp_dumplicate_key(state_dict):
+ state_dict_new = OrderedDict()
+ for key in state_dict.keys():
+ state_dict_new[key.replace('module.', '')] = state_dict[key]
+ return state_dict_new
+
+
+def load_model(ckpt_path, model_config, device, model_type):
+ model_params = model_config['model_params'][f'{model_type}_params']
+
+ if model_type == 'appearance_feature_extractor':
+ model = AppearanceFeatureExtractor(**model_params).to(device)
+ elif model_type == 'motion_extractor':
+ model = MotionExtractor(**model_params).to(device)
+ elif model_type == 'warping_module':
+ model = WarpingNetwork(**model_params).to(device)
+ elif model_type == 'spade_generator':
+ model = SPADEDecoder(**model_params).to(device)
+ elif model_type == 'stitching_retargeting_module':
+ # Special handling for stitching and retargeting module
+ config = model_config['model_params']['stitching_retargeting_module_params']
+ checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
+
+ stitcher = StitchingRetargetingNetwork(**config.get('stitching'))
+ stitcher.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_shoulder']))
+ stitcher = stitcher.to(device)
+ stitcher.eval()
+
+ retargetor_lip = StitchingRetargetingNetwork(**config.get('lip'))
+ retargetor_lip.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_mouth']))
+ retargetor_lip = retargetor_lip.to(device)
+ retargetor_lip.eval()
+
+ retargetor_eye = StitchingRetargetingNetwork(**config.get('eye'))
+ retargetor_eye.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_eye']))
+ retargetor_eye = retargetor_eye.to(device)
+ retargetor_eye.eval()
+
+ return {
+ 'stitching': stitcher,
+ 'lip': retargetor_lip,
+ 'eye': retargetor_eye
+ }
+ else:
+ raise ValueError(f"Unknown model type: {model_type}")
+
+ model.load_state_dict(torch.load(ckpt_path, map_location=lambda storage, loc: storage))
+ model.eval()
+ return model
+
+
+def load_description(fp):
+ with open(fp, 'r', encoding='utf-8') as f:
+ content = f.read()
+ return content
diff --git a/subprocess/LivePortrait/src/utils/io.py b/subprocess/LivePortrait/src/utils/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..28c2d99f09421fc9eb1f6475419cb1c6e6dcd028
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/io.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+
+import os
+from glob import glob
+import os.path as osp
+import imageio
+import numpy as np
+import pickle
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
+
+from .helper import mkdir, suffix
+
+
+def load_image_rgb(image_path: str):
+ if not osp.exists(image_path):
+ raise FileNotFoundError(f"Image not found: {image_path}")
+ img = cv2.imread(image_path, cv2.IMREAD_COLOR)
+ return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+
+def load_driving_info(driving_info):
+ driving_video_ori = []
+
+ def load_images_from_directory(directory):
+ image_paths = sorted(glob(osp.join(directory, '*.png')) + glob(osp.join(directory, '*.jpg')))
+ return [load_image_rgb(im_path) for im_path in image_paths]
+
+ def load_images_from_video(file_path):
+ reader = imageio.get_reader(file_path, "ffmpeg")
+ return [image for _, image in enumerate(reader)]
+
+ if osp.isdir(driving_info):
+ driving_video_ori = load_images_from_directory(driving_info)
+ elif osp.isfile(driving_info):
+ driving_video_ori = load_images_from_video(driving_info)
+
+ return driving_video_ori
+
+
+def contiguous(obj):
+ if not obj.flags.c_contiguous:
+ obj = obj.copy(order="C")
+ return obj
+
+
+def resize_to_limit(img: np.ndarray, max_dim=1920, division=2):
+ """
+ ajust the size of the image so that the maximum dimension does not exceed max_dim, and the width and the height of the image are multiples of n.
+ :param img: the image to be processed.
+ :param max_dim: the maximum dimension constraint.
+ :param n: the number that needs to be multiples of.
+ :return: the adjusted image.
+ """
+ h, w = img.shape[:2]
+
+ # ajust the size of the image according to the maximum dimension
+ if max_dim > 0 and max(h, w) > max_dim:
+ if h > w:
+ new_h = max_dim
+ new_w = int(w * (max_dim / h))
+ else:
+ new_w = max_dim
+ new_h = int(h * (max_dim / w))
+ img = cv2.resize(img, (new_w, new_h))
+
+ # ensure that the image dimensions are multiples of n
+ division = max(division, 1)
+ new_h = img.shape[0] - (img.shape[0] % division)
+ new_w = img.shape[1] - (img.shape[1] % division)
+
+ if new_h == 0 or new_w == 0:
+ # when the width or height is less than n, no need to process
+ return img
+
+ if new_h != img.shape[0] or new_w != img.shape[1]:
+ img = img[:new_h, :new_w]
+
+ return img
+
+
+def load_img_online(obj, mode="bgr", **kwargs):
+ max_dim = kwargs.get("max_dim", 1920)
+ n = kwargs.get("n", 2)
+ if isinstance(obj, str):
+ if mode.lower() == "gray":
+ img = cv2.imread(obj, cv2.IMREAD_GRAYSCALE)
+ else:
+ img = cv2.imread(obj, cv2.IMREAD_COLOR)
+ else:
+ img = obj
+
+ # Resize image to satisfy constraints
+ img = resize_to_limit(img, max_dim=max_dim, division=n)
+
+ if mode.lower() == "bgr":
+ return contiguous(img)
+ elif mode.lower() == "rgb":
+ return contiguous(img[..., ::-1])
+ else:
+ raise Exception(f"Unknown mode {mode}")
+
+
+def load(fp):
+ suffix_ = suffix(fp)
+
+ if suffix_ == "npy":
+ return np.load(fp)
+ elif suffix_ == "pkl":
+ return pickle.load(open(fp, "rb"))
+ else:
+ raise Exception(f"Unknown type: {suffix}")
+
+
+def dump(wfp, obj):
+ wd = osp.split(wfp)[0]
+ if wd != "" and not osp.exists(wd):
+ mkdir(wd)
+
+ _suffix = suffix(wfp)
+ if _suffix == "npy":
+ np.save(wfp, obj)
+ elif _suffix == "pkl":
+ pickle.dump(obj, open(wfp, "wb"))
+ else:
+ raise Exception("Unknown type: {}".format(_suffix))
diff --git a/subprocess/LivePortrait/src/utils/landmark_runner.py b/subprocess/LivePortrait/src/utils/landmark_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..7680a2c4a65ebe7f4dadbafc4a35603ab9f90be6
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/landmark_runner.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+
+import os.path as osp
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
+import torch
+import numpy as np
+import onnxruntime
+from .timer import Timer
+from .rprint import rlog
+from .crop import crop_image, _transform_pts
+
+
+def make_abs_path(fn):
+ return osp.join(osp.dirname(osp.realpath(__file__)), fn)
+
+
+def to_ndarray(obj):
+ if isinstance(obj, torch.Tensor):
+ return obj.cpu().numpy()
+ elif isinstance(obj, np.ndarray):
+ return obj
+ else:
+ return np.array(obj)
+
+
+class LandmarkRunner(object):
+ """landmark runner"""
+
+ def __init__(self, **kwargs):
+ ckpt_path = kwargs.get('ckpt_path')
+ onnx_provider = kwargs.get('onnx_provider', 'cuda') # 默认用cuda
+ device_id = kwargs.get('device_id', 0)
+ self.dsize = kwargs.get('dsize', 224)
+ self.timer = Timer()
+
+ if onnx_provider.lower() == 'cuda':
+ self.session = onnxruntime.InferenceSession(
+ ckpt_path, providers=[
+ ('CUDAExecutionProvider', {'device_id': device_id})
+ ]
+ )
+ else:
+ opts = onnxruntime.SessionOptions()
+ opts.intra_op_num_threads = 4 # 默认线程数为 4
+ self.session = onnxruntime.InferenceSession(
+ ckpt_path, providers=['CPUExecutionProvider'],
+ sess_options=opts
+ )
+
+ def _run(self, inp):
+ out = self.session.run(None, {'input': inp})
+ return out
+
+ def run(self, img_rgb: np.ndarray, lmk=None):
+ if lmk is not None:
+ crop_dct = crop_image(img_rgb, lmk, dsize=self.dsize, scale=1.5, vy_ratio=-0.1)
+ img_crop_rgb = crop_dct['img_crop']
+ else:
+ # NOTE: force resize to 224x224, NOT RECOMMEND!
+ img_crop_rgb = cv2.resize(img_rgb, (self.dsize, self.dsize))
+ scale = max(img_rgb.shape[:2]) / self.dsize
+ crop_dct = {
+ 'M_c2o': np.array([
+ [scale, 0., 0.],
+ [0., scale, 0.],
+ [0., 0., 1.],
+ ], dtype=np.float32),
+ }
+
+ inp = (img_crop_rgb.astype(np.float32) / 255.).transpose(2, 0, 1)[None, ...] # HxWx3 (BGR) -> 1x3xHxW (RGB!)
+
+ out_lst = self._run(inp)
+ out_pts = out_lst[2]
+
+ # 2d landmarks 203 points
+ lmk = to_ndarray(out_pts[0]).reshape(-1, 2) * self.dsize # scale to 0-224
+ lmk = _transform_pts(lmk, M=crop_dct['M_c2o'])
+
+ return lmk
+
+ def warmup(self):
+ self.timer.tic()
+
+ dummy_image = np.zeros((1, 3, self.dsize, self.dsize), dtype=np.float32)
+
+ _ = self._run(dummy_image)
+
+ elapse = self.timer.toc()
+ rlog(f'LandmarkRunner warmup time: {elapse:.3f}s')
diff --git a/subprocess/LivePortrait/src/utils/resources/mask_template.png b/subprocess/LivePortrait/src/utils/resources/mask_template.png
new file mode 100644
index 0000000000000000000000000000000000000000..bca6ca5977ba820d0d2c05b3793c6231cc82e715
Binary files /dev/null and b/subprocess/LivePortrait/src/utils/resources/mask_template.png differ
diff --git a/subprocess/LivePortrait/src/utils/retargeting_utils.py b/subprocess/LivePortrait/src/utils/retargeting_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae2e5f52effe8107503586c9f5a24f39dfdbbbcf
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/retargeting_utils.py
@@ -0,0 +1,24 @@
+
+"""
+Functions to compute distance ratios between specific pairs of facial landmarks
+"""
+
+import numpy as np
+
+
+def calculate_distance_ratio(lmk: np.ndarray, idx1: int, idx2: int, idx3: int, idx4: int, eps: float = 1e-6) -> np.ndarray:
+ return (np.linalg.norm(lmk[:, idx1] - lmk[:, idx2], axis=1, keepdims=True) /
+ (np.linalg.norm(lmk[:, idx3] - lmk[:, idx4], axis=1, keepdims=True) + eps))
+
+
+def calc_eye_close_ratio(lmk: np.ndarray, target_eye_ratio: np.ndarray = None) -> np.ndarray:
+ lefteye_close_ratio = calculate_distance_ratio(lmk, 6, 18, 0, 12)
+ righteye_close_ratio = calculate_distance_ratio(lmk, 30, 42, 24, 36)
+ if target_eye_ratio is not None:
+ return np.concatenate([lefteye_close_ratio, righteye_close_ratio, target_eye_ratio], axis=1)
+ else:
+ return np.concatenate([lefteye_close_ratio, righteye_close_ratio], axis=1)
+
+
+def calc_lip_close_ratio(lmk: np.ndarray) -> np.ndarray:
+ return calculate_distance_ratio(lmk, 90, 102, 48, 66)
diff --git a/subprocess/LivePortrait/src/utils/rprint.py b/subprocess/LivePortrait/src/utils/rprint.py
new file mode 100644
index 0000000000000000000000000000000000000000..c43a42f9855bbb019725e6c2b6c6c50e6fa4d0c5
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/rprint.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+
+"""
+custom print and log functions
+"""
+
+__all__ = ['rprint', 'rlog']
+
+try:
+ from rich.console import Console
+ console = Console()
+ rprint = console.print
+ rlog = console.log
+except:
+ rprint = print
+ rlog = print
diff --git a/subprocess/LivePortrait/src/utils/timer.py b/subprocess/LivePortrait/src/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3570fa45d3ff36376471b82a5b3c02efe46eed98
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/timer.py
@@ -0,0 +1,29 @@
+# coding: utf-8
+
+"""
+tools to measure elapsed time
+"""
+
+import time
+
+class Timer(object):
+ """A simple timer."""
+
+ def __init__(self):
+ self.total_time = 0.
+ self.calls = 0
+ self.start_time = 0.
+ self.diff = 0.
+
+ def tic(self):
+ # using time.time instead of time.clock because time time.clock
+ # does not normalize for multithreading
+ self.start_time = time.time()
+
+ def toc(self, average=True):
+ self.diff = time.time() - self.start_time
+ return self.diff
+
+ def clear(self):
+ self.start_time = 0.
+ self.diff = 0.
diff --git a/subprocess/LivePortrait/src/utils/video.py b/subprocess/LivePortrait/src/utils/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..a69841238f27f0259a67f0290eb5807dd2087efa
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/video.py
@@ -0,0 +1,212 @@
+# coding: utf-8
+
+"""
+functions for processing video
+"""
+
+import os.path as osp
+import numpy as np
+import subprocess
+import imageio
+import cv2
+
+from .rprint import rlog as log
+
+# try:
+# import ffmpeg
+# except ImportError as e:
+# log(f'Try to install ffmpeg by: pip install ffmpeg-python==0.2.0', style='bold red')
+# raise(e)
+
+from rich.progress import track
+from .helper import prefix
+from .rprint import rprint as print
+
+
+
+def exec_cmd(cmd):
+ subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+
+def images2video(images, wfp, **kwargs):
+ fps = kwargs.get('fps', 30)
+ video_format = kwargs.get('format', 'mp4') # default is mp4 format
+ codec = kwargs.get('codec', 'libx264') # default is libx264 encoding
+ quality = kwargs.get('quality') # video quality
+ pixelformat = kwargs.get('pixelformat', 'yuv420p') # video pixel format
+ image_mode = kwargs.get('image_mode', 'rgb')
+ macro_block_size = kwargs.get('macro_block_size', 2)
+ ffmpeg_params = ['-crf', str(kwargs.get('crf', 18))]
+
+ writer = imageio.get_writer(
+ wfp, fps=fps, format=video_format,
+ codec=codec, quality=quality, ffmpeg_params=ffmpeg_params, pixelformat=pixelformat, macro_block_size=macro_block_size
+ )
+
+ n = len(images)
+ for i in track(range(n), description='Writing', transient=True):
+ if image_mode.lower() == 'bgr':
+ writer.append_data(images[i][..., ::-1])
+ else:
+ writer.append_data(images[i])
+
+ writer.close()
+
+
+def video2gif(video_fp, fps=30, size=256):
+ if osp.exists(video_fp):
+ d = osp.split(video_fp)[0]
+ fn = prefix(osp.basename(video_fp))
+ palette_wfp = osp.join(d, 'palette.png')
+ gif_wfp = osp.join(d, f'{fn}.gif')
+ # generate the palette
+ cmd = f'ffmpeg -i {video_fp} -vf "fps={fps},scale={size}:-1:flags=lanczos,palettegen" {palette_wfp} -y'
+ exec_cmd(cmd)
+ # use the palette to generate the gif
+ cmd = f'ffmpeg -i {video_fp} -i {palette_wfp} -filter_complex "fps={fps},scale={size}:-1:flags=lanczos[x];[x][1:v]paletteuse" {gif_wfp} -y'
+ exec_cmd(cmd)
+ else:
+ print(f'video_fp: {video_fp} not exists!')
+
+
+def merge_audio_video(video_fp, audio_fp, wfp):
+ if osp.exists(video_fp) and osp.exists(audio_fp):
+ cmd = f'ffmpeg -i {video_fp} -i {audio_fp} -c:v copy -c:a aac {wfp} -y'
+ exec_cmd(cmd)
+ print(f'merge {video_fp} and {audio_fp} to {wfp}')
+ else:
+ print(f'video_fp: {video_fp} or audio_fp: {audio_fp} not exists!')
+
+
+def blend(img: np.ndarray, mask: np.ndarray, background_color=(255, 255, 255)):
+ mask_float = mask.astype(np.float32) / 255.
+ background_color = np.array(background_color).reshape([1, 1, 3])
+ bg = np.ones_like(img) * background_color
+ img = np.clip(mask_float * img + (1 - mask_float) * bg, 0, 255).astype(np.uint8)
+ return img
+
+
+def concat_frames(driving_image_lst, source_image, I_p_lst):
+ # TODO: add more concat style, e.g., left-down corner driving
+ out_lst = []
+ h, w, _ = I_p_lst[0].shape
+
+ for idx, _ in track(enumerate(I_p_lst), total=len(I_p_lst), description='Concatenating result...'):
+ I_p = I_p_lst[idx]
+ source_image_resized = cv2.resize(source_image, (w, h))
+
+ if driving_image_lst is None:
+ out = np.hstack((source_image_resized, I_p))
+ else:
+ driving_image = driving_image_lst[idx]
+ driving_image_resized = cv2.resize(driving_image, (w, h))
+ out = np.hstack((driving_image_resized, source_image_resized, I_p))
+
+ out_lst.append(out)
+ return out_lst
+
+
+class VideoWriter:
+ def __init__(self, **kwargs):
+ self.fps = kwargs.get('fps', 30)
+ self.wfp = kwargs.get('wfp', 'video.mp4')
+ self.video_format = kwargs.get('format', 'mp4')
+ self.codec = kwargs.get('codec', 'libx264')
+ self.quality = kwargs.get('quality')
+ self.pixelformat = kwargs.get('pixelformat', 'yuv420p')
+ self.image_mode = kwargs.get('image_mode', 'rgb')
+ self.ffmpeg_params = kwargs.get('ffmpeg_params')
+
+ self.writer = imageio.get_writer(
+ self.wfp, fps=self.fps, format=self.video_format,
+ codec=self.codec, quality=self.quality,
+ ffmpeg_params=self.ffmpeg_params, pixelformat=self.pixelformat
+ )
+
+ def write(self, image):
+ if self.image_mode.lower() == 'bgr':
+ self.writer.append_data(image[..., ::-1])
+ else:
+ self.writer.append_data(image)
+
+ def close(self):
+ if self.writer is not None:
+ self.writer.close()
+
+
+def change_video_fps(input_file, output_file, fps=20, codec='libx264', crf=5):
+ cmd = f"ffmpeg -i {input_file} -c:v {codec} -crf {crf} -r {fps} {output_file} -y"
+ exec_cmd(cmd)
+
+
+def get_fps(filepath, default_fps=25):
+ try:
+ fps = cv2.VideoCapture(filepath).get(cv2.CAP_PROP_FPS)
+
+ if fps in (0, None):
+ fps = default_fps
+ except Exception as e:
+ print(e)
+ fps = default_fps
+
+ return fps
+
+
+def has_audio_stream(video_path: str) -> bool:
+ """
+ Check if the video file contains an audio stream.
+
+ :param video_path: Path to the video file
+ :return: True if the video contains an audio stream, False otherwise
+ """
+ if osp.isdir(video_path):
+ return False
+
+ cmd = [
+ 'ffprobe',
+ '-v', 'error',
+ '-select_streams', 'a',
+ '-show_entries', 'stream=codec_type',
+ '-of', 'default=noprint_wrappers=1:nokey=1',
+ video_path
+ ]
+
+ result = subprocess.run(cmd, capture_output=True, text=True)
+ if result.returncode != 0:
+ log(f"Error occurred while probing video: {result.stderr}")
+ return False
+
+ # Check if there is any output from ffprobe command
+ return bool(result.stdout.strip())
+
+
+def add_audio_to_video(silent_video_path: str, audio_video_path: str, output_video_path: str):
+ cmd = [
+ 'ffmpeg',
+ '-y',
+ '-i', silent_video_path,
+ '-i', audio_video_path,
+ '-map', '0:v',
+ '-map', '1:a',
+ '-c:v', 'copy',
+ '-shortest',
+ output_video_path
+ ]
+
+ try:
+ exec_cmd(' '.join(cmd))
+ log(f"Video with audio generated successfully: {output_video_path}")
+ except subprocess.CalledProcessError as e:
+ log(f"Error occurred: {e}")
+
+
+def bb_intersection_over_union(boxA, boxB):
+ xA = max(boxA[0], boxB[0])
+ yA = max(boxA[1], boxB[1])
+ xB = min(boxA[2], boxB[2])
+ yB = min(boxA[3], boxB[3])
+ interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+ boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
+ boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
+ iou = interArea / float(boxAArea + boxBArea - interArea)
+ return iou
diff --git a/subprocess/LivePortrait/src/utils/viz.py b/subprocess/LivePortrait/src/utils/viz.py
new file mode 100644
index 0000000000000000000000000000000000000000..59443cbf207f3395bee241f63c7acb95b9402530
--- /dev/null
+++ b/subprocess/LivePortrait/src/utils/viz.py
@@ -0,0 +1,19 @@
+# coding: utf-8
+
+import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
+
+
+def viz_lmk(img_, vps, **kwargs):
+ """可视化点"""
+ lineType = kwargs.get("lineType", cv2.LINE_8) # cv2.LINE_AA
+ img_for_viz = img_.copy()
+ for pt in vps:
+ cv2.circle(
+ img_for_viz,
+ (int(pt[0]), int(pt[1])),
+ radius=kwargs.get("radius", 1),
+ color=(0, 255, 0),
+ thickness=kwargs.get("thickness", 1),
+ lineType=lineType,
+ )
+ return img_for_viz
diff --git a/subprocess/LivePortrait/uploads/d6.mp4 b/subprocess/LivePortrait/uploads/d6.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..44f351385cef843b21b03fab8c3b10e0c005ec5e
--- /dev/null
+++ b/subprocess/LivePortrait/uploads/d6.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00e3ea79bbf28cbdc4fbb67ec655d9a0fe876e880ec45af55ae481348d0c0fff
+size 1967790
diff --git a/subprocess/LivePortrait/uploads/image(3).png b/subprocess/LivePortrait/uploads/image(3).png
new file mode 100644
index 0000000000000000000000000000000000000000..bb0ee94c02976ada9d7c4857109d338bfa9b52d7
Binary files /dev/null and b/subprocess/LivePortrait/uploads/image(3).png differ
diff --git a/subprocess/LivePortrait/uploads/intro.mp4 b/subprocess/LivePortrait/uploads/intro.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..2d220c12b25a494a9d25ceb1d7f5e6f1413f8b6d
--- /dev/null
+++ b/subprocess/LivePortrait/uploads/intro.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a984facdcb2c83b29cb6bee2888fd65f1d624d2a234125c83368f381ab4a0ceb
+size 4368949
diff --git a/subprocess/port.db b/subprocess/port.db
new file mode 100644
index 0000000000000000000000000000000000000000..ae3ccc2bd090134988f23e7aa346ab7d5ff937fe
Binary files /dev/null and b/subprocess/port.db differ