Update videoretalking/third_part/GPEN/gpen_face_enhancer.py
Browse files
videoretalking/third_part/GPEN/gpen_face_enhancer.py
CHANGED
|
@@ -1,138 +1,138 @@
|
|
| 1 |
-
import cv2
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
######### face enhancement
|
| 5 |
-
from face_parse.face_parsing import FaceParse
|
| 6 |
-
from face_detect.retinaface_detection import RetinaFaceDetection
|
| 7 |
-
from face_parse.face_parsing import FaceParse
|
| 8 |
-
from face_model.face_gan import FaceGAN
|
| 9 |
-
# from sr_model.real_esrnet import RealESRNet
|
| 10 |
-
from align_faces import warp_and_crop_face, get_reference_facial_points
|
| 11 |
-
from utils.inference_utils import Laplacian_Pyramid_Blending_with_mask
|
| 12 |
-
|
| 13 |
-
class FaceEnhancement(object):
|
| 14 |
-
def __init__(self, base_dir='./', size=512, model=None, use_sr=True, sr_model=None, channel_multiplier=2, narrow=1, device='cuda'):
|
| 15 |
-
self.facedetector = RetinaFaceDetection(base_dir, device)
|
| 16 |
-
self.facegan = FaceGAN(base_dir, size, model, channel_multiplier, narrow, device=device)
|
| 17 |
-
# self.srmodel = RealESRNet(base_dir, sr_model, device=device)
|
| 18 |
-
self.srmodel=None
|
| 19 |
-
self.faceparser = FaceParse(base_dir, device=device)
|
| 20 |
-
self.use_sr = use_sr
|
| 21 |
-
self.size = size
|
| 22 |
-
self.threshold = 0.9
|
| 23 |
-
|
| 24 |
-
# the mask for pasting restored faces back
|
| 25 |
-
self.mask = np.zeros((512, 512), np.float32)
|
| 26 |
-
cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA)
|
| 27 |
-
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
|
| 28 |
-
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
|
| 29 |
-
|
| 30 |
-
self.kernel = np.array((
|
| 31 |
-
[0.0625, 0.125, 0.0625],
|
| 32 |
-
[0.125, 0.25, 0.125],
|
| 33 |
-
[0.0625, 0.125, 0.0625]), dtype="float32")
|
| 34 |
-
|
| 35 |
-
# get the reference 5 landmarks position in the crop settings
|
| 36 |
-
default_square = True
|
| 37 |
-
inner_padding_factor = 0.25
|
| 38 |
-
outer_padding = (0, 0)
|
| 39 |
-
self.reference_5pts = get_reference_facial_points(
|
| 40 |
-
(self.size, self.size), inner_padding_factor, outer_padding, default_square)
|
| 41 |
-
|
| 42 |
-
def mask_postprocess(self, mask, thres=20):
|
| 43 |
-
mask[:thres, :] = 0; mask[-thres:, :] = 0
|
| 44 |
-
mask[:, :thres] = 0; mask[:, -thres:] = 0
|
| 45 |
-
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
| 46 |
-
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
| 47 |
-
return mask.astype(np.float32)
|
| 48 |
-
|
| 49 |
-
def process(self, img, ori_img, bbox=None, face_enhance=True, possion_blending=False):
|
| 50 |
-
if self.use_sr:
|
| 51 |
-
img_sr = self.srmodel.process(img)
|
| 52 |
-
if img_sr is not None:
|
| 53 |
-
img = cv2.resize(img, img_sr.shape[:2][::-1])
|
| 54 |
-
|
| 55 |
-
facebs, landms = self.facedetector.detect(img.copy())
|
| 56 |
-
|
| 57 |
-
orig_faces, enhanced_faces = [], []
|
| 58 |
-
height, width = img.shape[:2]
|
| 59 |
-
full_mask = np.zeros((height, width), dtype=np.float32)
|
| 60 |
-
full_img = np.zeros(ori_img.shape, dtype=np.uint8)
|
| 61 |
-
|
| 62 |
-
for i, (faceb, facial5points) in enumerate(zip(facebs, landms)):
|
| 63 |
-
if faceb[4]<self.threshold: continue
|
| 64 |
-
fh, fw = (faceb[3]-faceb[1]), (faceb[2]-faceb[0])
|
| 65 |
-
|
| 66 |
-
facial5points = np.reshape(facial5points, (2, 5))
|
| 67 |
-
|
| 68 |
-
of, tfm_inv = warp_and_crop_face(img, facial5points, reference_pts=self.reference_5pts, crop_size=(self.size, self.size))
|
| 69 |
-
|
| 70 |
-
# enhance the face
|
| 71 |
-
if face_enhance:
|
| 72 |
-
ef = self.facegan.process(of)
|
| 73 |
-
else:
|
| 74 |
-
ef = of
|
| 75 |
-
|
| 76 |
-
orig_faces.append(of)
|
| 77 |
-
enhanced_faces.append(ef)
|
| 78 |
-
|
| 79 |
-
# print(ef.shape)
|
| 80 |
-
# tmp_mask = self.mask
|
| 81 |
-
'''
|
| 82 |
-
0: 'background' 1: 'skin' 2: 'nose'
|
| 83 |
-
3: 'eye_g' 4: 'l_eye' 5: 'r_eye'
|
| 84 |
-
6: 'l_brow' 7: 'r_brow' 8: 'l_ear'
|
| 85 |
-
9: 'r_ear' 10: 'mouth' 11: 'u_lip'
|
| 86 |
-
12: 'l_lip' 13: 'hair' 14: 'hat'
|
| 87 |
-
15: 'ear_r' 16: 'neck_l' 17: 'neck'
|
| 88 |
-
18: 'cloth'
|
| 89 |
-
'''
|
| 90 |
-
|
| 91 |
-
# no ear, no neck, no hair&hat, only face region
|
| 92 |
-
mm = [0, 255, 255, 255, 255, 255, 255, 255, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0]
|
| 93 |
-
mask_sharp = self.faceparser.process(ef, mm)[0]/255.
|
| 94 |
-
tmp_mask = self.mask_postprocess(mask_sharp)
|
| 95 |
-
tmp_mask = cv2.resize(tmp_mask, ef.shape[:2])
|
| 96 |
-
mask_sharp = cv2.resize(mask_sharp, ef.shape[:2])
|
| 97 |
-
|
| 98 |
-
tmp_mask = cv2.warpAffine(tmp_mask, tfm_inv, (width, height), flags=3)
|
| 99 |
-
mask_sharp = cv2.warpAffine(mask_sharp, tfm_inv, (width, height), flags=3)
|
| 100 |
-
|
| 101 |
-
if min(fh, fw)<100: # gaussian filter for small faces
|
| 102 |
-
ef = cv2.filter2D(ef, -1, self.kernel)
|
| 103 |
-
|
| 104 |
-
if face_enhance:
|
| 105 |
-
tmp_img = cv2.warpAffine(ef, tfm_inv, (width, height), flags=3)
|
| 106 |
-
else:
|
| 107 |
-
tmp_img = cv2.warpAffine(of, tfm_inv, (width, height), flags=3)
|
| 108 |
-
|
| 109 |
-
mask = tmp_mask - full_mask
|
| 110 |
-
full_mask[np.where(mask>0)] = tmp_mask[np.where(mask>0)]
|
| 111 |
-
full_img[np.where(mask>0)] = tmp_img[np.where(mask>0)]
|
| 112 |
-
|
| 113 |
-
mask_sharp = cv2.GaussianBlur(mask_sharp, (0,0), sigmaX=1, sigmaY=1, borderType = cv2.BORDER_DEFAULT)
|
| 114 |
-
|
| 115 |
-
full_mask = full_mask[:, :, np.newaxis]
|
| 116 |
-
mask_sharp = mask_sharp[:, :, np.newaxis]
|
| 117 |
-
|
| 118 |
-
if self.use_sr and img_sr is not None:
|
| 119 |
-
img = cv2.convertScaleAbs(img_sr*(1-full_mask) + full_img*full_mask)
|
| 120 |
-
|
| 121 |
-
elif possion_blending is True:
|
| 122 |
-
if bbox is not None:
|
| 123 |
-
y1, y2, x1, x2 = bbox
|
| 124 |
-
mask_bbox = np.zeros_like(mask_sharp)
|
| 125 |
-
mask_bbox[y1:y2 - 5, x1:x2] = 1
|
| 126 |
-
full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, np.float32(mask_sharp * mask_bbox))]
|
| 127 |
-
else:
|
| 128 |
-
full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, full_mask)]
|
| 129 |
-
|
| 130 |
-
img = Laplacian_Pyramid_Blending_with_mask(full_img, ori_img, full_mask, 6)
|
| 131 |
-
img = np.clip(img, 0 ,255)
|
| 132 |
-
img = np.uint8(cv2.resize(img, (width, height)))
|
| 133 |
-
|
| 134 |
-
else:
|
| 135 |
-
img = cv2.convertScaleAbs(ori_img*(1-full_mask) + full_img*full_mask)
|
| 136 |
-
img = cv2.convertScaleAbs(ori_img*(1-mask_sharp) + img*mask_sharp)
|
| 137 |
-
|
| 138 |
return img, orig_faces, enhanced_faces
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
######### face enhancement
|
| 5 |
+
from videoretalking.third_part.GPEN.face_parse.face_parsing import FaceParse
|
| 6 |
+
from videoretalking.third_part.GPEN.face_detect.retinaface_detection import RetinaFaceDetection
|
| 7 |
+
from videoretalking.third_part.GPEN.face_parse.face_parsing import FaceParse
|
| 8 |
+
from videoretalking.third_part.GPEN.face_model.face_gan import FaceGAN
|
| 9 |
+
# from sr_model.real_esrnet import RealESRNet
|
| 10 |
+
from align_faces import warp_and_crop_face, get_reference_facial_points
|
| 11 |
+
from utils.inference_utils import Laplacian_Pyramid_Blending_with_mask
|
| 12 |
+
|
| 13 |
+
class FaceEnhancement(object):
|
| 14 |
+
def __init__(self, base_dir='./', size=512, model=None, use_sr=True, sr_model=None, channel_multiplier=2, narrow=1, device='cuda'):
|
| 15 |
+
self.facedetector = RetinaFaceDetection(base_dir, device)
|
| 16 |
+
self.facegan = FaceGAN(base_dir, size, model, channel_multiplier, narrow, device=device)
|
| 17 |
+
# self.srmodel = RealESRNet(base_dir, sr_model, device=device)
|
| 18 |
+
self.srmodel=None
|
| 19 |
+
self.faceparser = FaceParse(base_dir, device=device)
|
| 20 |
+
self.use_sr = use_sr
|
| 21 |
+
self.size = size
|
| 22 |
+
self.threshold = 0.9
|
| 23 |
+
|
| 24 |
+
# the mask for pasting restored faces back
|
| 25 |
+
self.mask = np.zeros((512, 512), np.float32)
|
| 26 |
+
cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA)
|
| 27 |
+
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
|
| 28 |
+
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
|
| 29 |
+
|
| 30 |
+
self.kernel = np.array((
|
| 31 |
+
[0.0625, 0.125, 0.0625],
|
| 32 |
+
[0.125, 0.25, 0.125],
|
| 33 |
+
[0.0625, 0.125, 0.0625]), dtype="float32")
|
| 34 |
+
|
| 35 |
+
# get the reference 5 landmarks position in the crop settings
|
| 36 |
+
default_square = True
|
| 37 |
+
inner_padding_factor = 0.25
|
| 38 |
+
outer_padding = (0, 0)
|
| 39 |
+
self.reference_5pts = get_reference_facial_points(
|
| 40 |
+
(self.size, self.size), inner_padding_factor, outer_padding, default_square)
|
| 41 |
+
|
| 42 |
+
def mask_postprocess(self, mask, thres=20):
|
| 43 |
+
mask[:thres, :] = 0; mask[-thres:, :] = 0
|
| 44 |
+
mask[:, :thres] = 0; mask[:, -thres:] = 0
|
| 45 |
+
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
| 46 |
+
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
| 47 |
+
return mask.astype(np.float32)
|
| 48 |
+
|
| 49 |
+
def process(self, img, ori_img, bbox=None, face_enhance=True, possion_blending=False):
|
| 50 |
+
if self.use_sr:
|
| 51 |
+
img_sr = self.srmodel.process(img)
|
| 52 |
+
if img_sr is not None:
|
| 53 |
+
img = cv2.resize(img, img_sr.shape[:2][::-1])
|
| 54 |
+
|
| 55 |
+
facebs, landms = self.facedetector.detect(img.copy())
|
| 56 |
+
|
| 57 |
+
orig_faces, enhanced_faces = [], []
|
| 58 |
+
height, width = img.shape[:2]
|
| 59 |
+
full_mask = np.zeros((height, width), dtype=np.float32)
|
| 60 |
+
full_img = np.zeros(ori_img.shape, dtype=np.uint8)
|
| 61 |
+
|
| 62 |
+
for i, (faceb, facial5points) in enumerate(zip(facebs, landms)):
|
| 63 |
+
if faceb[4]<self.threshold: continue
|
| 64 |
+
fh, fw = (faceb[3]-faceb[1]), (faceb[2]-faceb[0])
|
| 65 |
+
|
| 66 |
+
facial5points = np.reshape(facial5points, (2, 5))
|
| 67 |
+
|
| 68 |
+
of, tfm_inv = warp_and_crop_face(img, facial5points, reference_pts=self.reference_5pts, crop_size=(self.size, self.size))
|
| 69 |
+
|
| 70 |
+
# enhance the face
|
| 71 |
+
if face_enhance:
|
| 72 |
+
ef = self.facegan.process(of)
|
| 73 |
+
else:
|
| 74 |
+
ef = of
|
| 75 |
+
|
| 76 |
+
orig_faces.append(of)
|
| 77 |
+
enhanced_faces.append(ef)
|
| 78 |
+
|
| 79 |
+
# print(ef.shape)
|
| 80 |
+
# tmp_mask = self.mask
|
| 81 |
+
'''
|
| 82 |
+
0: 'background' 1: 'skin' 2: 'nose'
|
| 83 |
+
3: 'eye_g' 4: 'l_eye' 5: 'r_eye'
|
| 84 |
+
6: 'l_brow' 7: 'r_brow' 8: 'l_ear'
|
| 85 |
+
9: 'r_ear' 10: 'mouth' 11: 'u_lip'
|
| 86 |
+
12: 'l_lip' 13: 'hair' 14: 'hat'
|
| 87 |
+
15: 'ear_r' 16: 'neck_l' 17: 'neck'
|
| 88 |
+
18: 'cloth'
|
| 89 |
+
'''
|
| 90 |
+
|
| 91 |
+
# no ear, no neck, no hair&hat, only face region
|
| 92 |
+
mm = [0, 255, 255, 255, 255, 255, 255, 255, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0]
|
| 93 |
+
mask_sharp = self.faceparser.process(ef, mm)[0]/255.
|
| 94 |
+
tmp_mask = self.mask_postprocess(mask_sharp)
|
| 95 |
+
tmp_mask = cv2.resize(tmp_mask, ef.shape[:2])
|
| 96 |
+
mask_sharp = cv2.resize(mask_sharp, ef.shape[:2])
|
| 97 |
+
|
| 98 |
+
tmp_mask = cv2.warpAffine(tmp_mask, tfm_inv, (width, height), flags=3)
|
| 99 |
+
mask_sharp = cv2.warpAffine(mask_sharp, tfm_inv, (width, height), flags=3)
|
| 100 |
+
|
| 101 |
+
if min(fh, fw)<100: # gaussian filter for small faces
|
| 102 |
+
ef = cv2.filter2D(ef, -1, self.kernel)
|
| 103 |
+
|
| 104 |
+
if face_enhance:
|
| 105 |
+
tmp_img = cv2.warpAffine(ef, tfm_inv, (width, height), flags=3)
|
| 106 |
+
else:
|
| 107 |
+
tmp_img = cv2.warpAffine(of, tfm_inv, (width, height), flags=3)
|
| 108 |
+
|
| 109 |
+
mask = tmp_mask - full_mask
|
| 110 |
+
full_mask[np.where(mask>0)] = tmp_mask[np.where(mask>0)]
|
| 111 |
+
full_img[np.where(mask>0)] = tmp_img[np.where(mask>0)]
|
| 112 |
+
|
| 113 |
+
mask_sharp = cv2.GaussianBlur(mask_sharp, (0,0), sigmaX=1, sigmaY=1, borderType = cv2.BORDER_DEFAULT)
|
| 114 |
+
|
| 115 |
+
full_mask = full_mask[:, :, np.newaxis]
|
| 116 |
+
mask_sharp = mask_sharp[:, :, np.newaxis]
|
| 117 |
+
|
| 118 |
+
if self.use_sr and img_sr is not None:
|
| 119 |
+
img = cv2.convertScaleAbs(img_sr*(1-full_mask) + full_img*full_mask)
|
| 120 |
+
|
| 121 |
+
elif possion_blending is True:
|
| 122 |
+
if bbox is not None:
|
| 123 |
+
y1, y2, x1, x2 = bbox
|
| 124 |
+
mask_bbox = np.zeros_like(mask_sharp)
|
| 125 |
+
mask_bbox[y1:y2 - 5, x1:x2] = 1
|
| 126 |
+
full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, np.float32(mask_sharp * mask_bbox))]
|
| 127 |
+
else:
|
| 128 |
+
full_img, ori_img, full_mask = [cv2.resize(x,(512,512)) for x in (full_img, ori_img, full_mask)]
|
| 129 |
+
|
| 130 |
+
img = Laplacian_Pyramid_Blending_with_mask(full_img, ori_img, full_mask, 6)
|
| 131 |
+
img = np.clip(img, 0 ,255)
|
| 132 |
+
img = np.uint8(cv2.resize(img, (width, height)))
|
| 133 |
+
|
| 134 |
+
else:
|
| 135 |
+
img = cv2.convertScaleAbs(ori_img*(1-full_mask) + full_img*full_mask)
|
| 136 |
+
img = cv2.convertScaleAbs(ori_img*(1-mask_sharp) + img*mask_sharp)
|
| 137 |
+
|
| 138 |
return img, orig_faces, enhanced_faces
|