Spaces:

sczhou
/

CodeFormer

Running on Zero

App Files Files Community

106

faceplus

#38

by vincekey - opened Jul 20, 2023

base: refs/heads/main

←

from: refs/pr/38

Discussion Files changed

+110

-389

Files changed (8) hide show

.gitignore → CodeFormer/.gitignore +4 -5
CodeFormer/basicsr/utils/misc.py +2 -25
CodeFormer/basicsr/version.py +2 -2
CodeFormer/facelib/utils/face_restoration_helper.py +12 -77
CodeFormer/facelib/utils/misc.py +4 -32
CodeFormer/inference_codeformer.py +41 -126
README.md +2 -2
app.py +43 -120

.gitignore → CodeFormer/.gitignore RENAMED Viewed

@@ -5,9 +5,9 @@ version.py
 # ignored files with suffix
 *.html
-*.png
-*.jpeg
-*.jpg
 *.pt
 *.gif
 *.pth
@@ -122,8 +122,7 @@ venv.bak/
 .mypy_cache/
 # project
-CodeFormer/results/
-output/
 dlib/
 *.pth
 *_old*

 # ignored files with suffix
 *.html
+# *.png
+# *.jpeg
+# *.jpg
 *.pt
 *.gif
 *.pth
 .mypy_cache/
 # project
+results/
 dlib/
 *.pth
 *_old*

CodeFormer/basicsr/utils/misc.py CHANGED Viewed

@@ -1,36 +1,13 @@
 import os
-import re
 import random
 import time
 import torch
-import numpy as np
 from os import path as osp
 from .dist_util import master_only
 from .logger import get_root_logger
-IS_HIGH_VERSION = [int(m) for m in list(re.findall(r"^([0-9]+)\.([0-9]+)\.([0-9]+)([^0-9][a-zA-Z0-9]*)?(\+git.*)?$",\
-    torch.__version__)[0][:3])] >= [1, 12, 0]
-def gpu_is_available():
-    if IS_HIGH_VERSION:
-        if torch.backends.mps.is_available():
-            return True
-    return True if torch.cuda.is_available() and torch.backends.cudnn.is_available() else False
-def get_device(gpu_id=None):
-    if gpu_id is None:
-        gpu_str = ''
-    elif isinstance(gpu_id, int):
-        gpu_str = f':{gpu_id}'
-    else:
-        raise TypeError('Input should be int value.')
-    if IS_HIGH_VERSION:
-        if torch.backends.mps.is_available():
-            return torch.device('mps'+gpu_str)
-    return torch.device('cuda'+gpu_str if torch.cuda.is_available() and torch.backends.cudnn.is_available() else 'cpu')
 def set_random_seed(seed):
     """Set random seeds."""
@@ -154,4 +131,4 @@ def sizeof_fmt(size, suffix='B'):
         if abs(size) < 1024.0:
             return f'{size:3.1f} {unit}{suffix}'
         size /= 1024.0
-    return f'{size:3.1f} Y{suffix}'

+import numpy as np
 import os
 import random
 import time
 import torch
 from os import path as osp
 from .dist_util import master_only
 from .logger import get_root_logger
 def set_random_seed(seed):
     """Set random seeds."""
         if abs(size) < 1024.0:
             return f'{size:3.1f} {unit}{suffix}'
         size /= 1024.0
+    return f'{size:3.1f} Y{suffix}'

CodeFormer/basicsr/version.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # GENERATED VERSION FILE
-# TIME: Sat Sep 21 15:31:46 2024
 __version__ = '1.3.2'
-__gitsha__ = '1.3.2'
 version_info = (1, 3, 2)

 # GENERATED VERSION FILE
+# TIME: Sun Aug  7 15:14:26 2022
 __version__ = '1.3.2'
+__gitsha__ = '6f94023'
 version_info = (1, 3, 2)

CodeFormer/facelib/utils/face_restoration_helper.py CHANGED Viewed

@@ -6,14 +6,8 @@ from torchvision.transforms.functional import normalize
 from facelib.detection import init_detection_model
 from facelib.parsing import init_parsing_model
-from facelib.utils.misc import img2tensor, imwrite, is_gray, bgr2gray, adain_npy
-from basicsr.utils.download_util import load_file_from_url
-from basicsr.utils.misc import get_device
-dlib_model_url = {
-    'face_detector': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/mmod_human_face_detector-4cb19393.dat',
-    'shape_predictor_5': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/shape_predictor_5_face_landmarks-c4b1e980.dat'
-}
 def get_largest_face(det_faces, h, w):
@@ -70,15 +64,8 @@ class FaceRestoreHelper(object):
         self.crop_ratio = crop_ratio  # (h, w)
         assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
         self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
-        self.det_model = det_model
-        if self.det_model == 'dlib':
-            # standard 5 landmarks for FFHQ faces with 1024 x 1024
-            self.face_template = np.array([[686.77227723, 488.62376238], [586.77227723, 493.59405941],
-                                        [337.91089109, 488.38613861], [437.95049505, 493.51485149],
-                                        [513.58415842, 678.5049505]])
-            self.face_template = self.face_template / (1024 // face_size)
-        elif self.template_3points:
             self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
         else:
             # standard 5 landmarks for FFHQ faces with 512 x 512
@@ -90,6 +77,7 @@ class FaceRestoreHelper(object):
             # self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
             #                                 [198.22603, 372.82502], [313.91018, 372.75659]])
         self.face_template = self.face_template * (face_size / 512.0)
         if self.crop_ratio[0] > 1:
             self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
@@ -109,16 +97,12 @@ class FaceRestoreHelper(object):
         self.pad_input_imgs = []
         if device is None:
-            # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            self.device = get_device()
         else:
             self.device = device
         # init face detection model
-        if self.det_model == 'dlib':
-            self.face_detector, self.shape_predictor_5 = self.init_dlib(dlib_model_url['face_detector'], dlib_model_url['shape_predictor_5'])
-        else:
-            self.face_detector = init_detection_model(det_model, half=False, device=self.device)
         # init face parsing model
         self.use_parse = use_parse
@@ -141,7 +125,7 @@ class FaceRestoreHelper(object):
             img = img[:, :, 0:3]
         self.input_img = img
-        self.is_gray = is_gray(img, threshold=10)
         if self.is_gray:
             print('Grayscale input: True')
@@ -149,72 +133,25 @@ class FaceRestoreHelper(object):
             f = 512.0/min(self.input_img.shape[:2])
             self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
-    def init_dlib(self, detection_path, landmark5_path):
-        """Initialize the dlib detectors and predictors."""
-        try:
-            import dlib
-        except ImportError:
-            print('Please install dlib by running:' 'conda install -c conda-forge dlib')
-        detection_path = load_file_from_url(url=detection_path, model_dir='weights/dlib', progress=True, file_name=None)
-        landmark5_path = load_file_from_url(url=landmark5_path, model_dir='weights/dlib', progress=True, file_name=None)
-        face_detector = dlib.cnn_face_detection_model_v1(detection_path)
-        shape_predictor_5 = dlib.shape_predictor(landmark5_path)
-        return face_detector, shape_predictor_5
-    def get_face_landmarks_5_dlib(self,
-                                only_keep_largest=False,
-                                scale=1):
-        det_faces = self.face_detector(self.input_img, scale)
-        if len(det_faces) == 0:
-            print('No face detected. Try to increase upsample_num_times.')
-            return 0
-        else:
-            if only_keep_largest:
-                print('Detect several faces and only keep the largest.')
-                face_areas = []
-                for i in range(len(det_faces)):
-                    face_area = (det_faces[i].rect.right() - det_faces[i].rect.left()) * (
-                        det_faces[i].rect.bottom() - det_faces[i].rect.top())
-                    face_areas.append(face_area)
-                largest_idx = face_areas.index(max(face_areas))
-                self.det_faces = [det_faces[largest_idx]]
-            else:
-                self.det_faces = det_faces
-        if len(self.det_faces) == 0:
-            return 0
-        for face in self.det_faces:
-            shape = self.shape_predictor_5(self.input_img, face.rect)
-            landmark = np.array([[part.x, part.y] for part in shape.parts()])
-            self.all_landmarks_5.append(landmark)
-        return len(self.all_landmarks_5)
     def get_face_landmarks_5(self,
                              only_keep_largest=False,
                              only_center_face=False,
                              resize=None,
                              blur_ratio=0.01,
                              eye_dist_threshold=None):
-        if self.det_model == 'dlib':
-            return self.get_face_landmarks_5_dlib(only_keep_largest)
         if resize is None:
             scale = 1
             input_img = self.input_img
         else:
             h, w = self.input_img.shape[0:2]
             scale = resize / min(h, w)
-            # scale = max(1, scale) # always scale up; comment this out for HD images, e.g., AIGC faces.
             h, w = int(h * scale), int(w * scale)
             interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
             input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
         with torch.no_grad():
-            bboxes = self.face_detector.detect_faces(input_img)
         if bboxes is None or bboxes.shape[0] == 0:
             return 0
@@ -361,12 +298,10 @@ class FaceRestoreHelper(object):
                 torch.save(inverse_affine, save_path)
-    def add_restored_face(self, restored_face, input_face=None):
         if self.is_gray:
-            restored_face = bgr2gray(restored_face) # convert img into grayscale
-            if input_face is not None:
-                restored_face = adain_npy(restored_face, input_face) # transfer the color
-        self.restored_faces.append(restored_face)
     def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):

 from facelib.detection import init_detection_model
 from facelib.parsing import init_parsing_model
+from facelib.utils.misc import img2tensor, imwrite, is_gray, bgr2gray
 def get_largest_face(det_faces, h, w):
         self.crop_ratio = crop_ratio  # (h, w)
         assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
         self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
+        if self.template_3points:
             self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
         else:
             # standard 5 landmarks for FFHQ faces with 512 x 512
             # self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
             #                                 [198.22603, 372.82502], [313.91018, 372.75659]])
         self.face_template = self.face_template * (face_size / 512.0)
         if self.crop_ratio[0] > 1:
             self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
         self.pad_input_imgs = []
         if device is None:
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         else:
             self.device = device
         # init face detection model
+        self.face_det = init_detection_model(det_model, half=False, device=self.device)
         # init face parsing model
         self.use_parse = use_parse
             img = img[:, :, 0:3]
         self.input_img = img
+        self.is_gray = is_gray(img, threshold=5)
         if self.is_gray:
             print('Grayscale input: True')
             f = 512.0/min(self.input_img.shape[:2])
             self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
     def get_face_landmarks_5(self,
                              only_keep_largest=False,
                              only_center_face=False,
                              resize=None,
                              blur_ratio=0.01,
                              eye_dist_threshold=None):
         if resize is None:
             scale = 1
             input_img = self.input_img
         else:
             h, w = self.input_img.shape[0:2]
             scale = resize / min(h, w)
+            scale = max(1, scale) # always scale up
             h, w = int(h * scale), int(w * scale)
             interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
             input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
         with torch.no_grad():
+            bboxes = self.face_det.detect_faces(input_img)
         if bboxes is None or bboxes.shape[0] == 0:
             return 0
                 torch.save(inverse_affine, save_path)
+    def add_restored_face(self, face):
         if self.is_gray:
+            face = bgr2gray(face) # convert img into grayscale
+        self.restored_faces.append(face)
     def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):

CodeFormer/facelib/utils/misc.py CHANGED Viewed

@@ -7,13 +7,13 @@ import torch
 from torch.hub import download_url_to_file, get_dir
 from urllib.parse import urlparse
 # from basicsr.utils.download_util import download_file_from_google_drive
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_pretrained_models(file_ids, save_path_root):
-    import gdown
     os.makedirs(save_path_root, exist_ok=True)
     for file_name, file_id in file_ids.items():
@@ -23,7 +23,7 @@ def download_pretrained_models(file_ids, save_path_root):
             user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
             if user_response.lower() == 'y':
                 print(f'Covering {file_name} to {save_path}')
-                gdown.download(file_url, save_path, quiet=False)
                 # download_file_from_google_drive(file_id, save_path)
             elif user_response.lower() == 'n':
                 print(f'Skipping {file_name}')
@@ -31,7 +31,7 @@ def download_pretrained_models(file_ids, save_path_root):
                 raise ValueError('Wrong input. Only accepts Y/N.')
         else:
             print(f'Downloading {file_name} to {save_path}')
-            gdown.download(file_url, save_path, quiet=False)
             # download_file_from_google_drive(file_id, save_path)
@@ -172,31 +172,3 @@ def bgr2gray(img, out_channel=3):
     if out_channel == 3:
         gray = gray[:,:,np.newaxis].repeat(3, axis=2)
     return gray
-def calc_mean_std(feat, eps=1e-5):
-    """
-    Args:
-        feat (numpy): 3D [w h c]s
-    """
-    size = feat.shape
-    assert len(size) == 3, 'The input feature should be 3D tensor.'
-    c = size[2]
-    feat_var = feat.reshape(-1, c).var(axis=0) + eps
-    feat_std = np.sqrt(feat_var).reshape(1, 1, c)
-    feat_mean = feat.reshape(-1, c).mean(axis=0).reshape(1, 1, c)
-    return feat_mean, feat_std
-def adain_npy(content_feat, style_feat):
-    """Adaptive instance normalization for numpy.
-    Args:
-        content_feat (numpy): The input feature.
-        style_feat (numpy): The reference feature.
-    """
-    size = content_feat.shape
-    style_mean, style_std = calc_mean_std(style_feat)
-    content_mean, content_std = calc_mean_std(content_feat)
-    normalized_feat = (content_feat - np.broadcast_to(content_mean, size)) / np.broadcast_to(content_std, size)
-    return normalized_feat * np.broadcast_to(style_std, size) + np.broadcast_to(style_mean, size)

 from torch.hub import download_url_to_file, get_dir
 from urllib.parse import urlparse
 # from basicsr.utils.download_util import download_file_from_google_drive
+# import gdown
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_pretrained_models(file_ids, save_path_root):
     os.makedirs(save_path_root, exist_ok=True)
     for file_name, file_id in file_ids.items():
             user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
             if user_response.lower() == 'y':
                 print(f'Covering {file_name} to {save_path}')
+                # gdown.download(file_url, save_path, quiet=False)
                 # download_file_from_google_drive(file_id, save_path)
             elif user_response.lower() == 'n':
                 print(f'Skipping {file_name}')
                 raise ValueError('Wrong input. Only accepts Y/N.')
         else:
             print(f'Downloading {file_name} to {save_path}')
+            # gdown.download(file_url, save_path, quiet=False)
             # download_file_from_google_drive(file_id, save_path)
     if out_channel == 3:
         gray = gray[:,:,np.newaxis].repeat(3, axis=2)
     return gray

CodeFormer/inference_codeformer.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import cv2
 import argparse
@@ -6,9 +7,8 @@ import torch
 from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
-from basicsr.utils.misc import gpu_is_available, get_device
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
-from facelib.utils.misc import is_gray
 from basicsr.utils.registry import ARCH_REGISTRY
@@ -17,104 +17,51 @@ pretrain_model_url = {
 }
 def set_realesrgan():
-    from basicsr.archs.rrdbnet_arch import RRDBNet
-    from basicsr.utils.realesrgan_utils import RealESRGANer
-    use_half = False
-    if torch.cuda.is_available(): # set False in CPU/MPS mode
-        no_half_gpu_list = ['1650', '1660'] # set False for GPUs that don't support f16
-        if not True in [gpu in torch.cuda.get_device_name(0) for gpu in no_half_gpu_list]:
-            use_half = True
-    model = RRDBNet(
-        num_in_ch=3,
-        num_out_ch=3,
-        num_feat=64,
-        num_block=23,
-        num_grow_ch=32,
-        scale=2,
-    )
-    upsampler = RealESRGANer(
-        scale=2,
-        model_path="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/RealESRGAN_x2plus.pth",
-        model=model,
-        tile=args.bg_tile,
-        tile_pad=40,
-        pre_pad=0,
-        half=use_half
-    )
-    if not gpu_is_available():  # CPU
         import warnings
-        warnings.warn('Running on CPU now! Make sure your PyTorch version matches your CUDA.'
-                        'The unoptimized RealESRGAN is slow on CPU. '
-                        'If you want to disable it, please remove `--bg_upsampler` and `--face_upsample` in command.',
                         category=RuntimeWarning)
-    return upsampler
 if __name__ == '__main__':
-    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    device = get_device()
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input_path', type=str, default='./inputs/whole_imgs',
-            help='Input image, video or folder. Default: inputs/whole_imgs')
-    parser.add_argument('-o', '--output_path', type=str, default=None,
-            help='Output folder. Default: results/<input_name>_<w>')
-    parser.add_argument('-w', '--fidelity_weight', type=float, default=0.5,
-            help='Balance the quality and fidelity. Default: 0.5')
-    parser.add_argument('-s', '--upscale', type=int, default=2,
-            help='The final upsampling scale of the image. Default: 2')
-    parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces. Default: False')
-    parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face. Default: False')
-    parser.add_argument('--draw_box', action='store_true', help='Draw the bounding box for the detected faces. Default: False')
     # large det_model: 'YOLOv5l', 'retinaface_resnet50'
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
-    parser.add_argument('--detection_model', type=str, default='retinaface_resnet50',
-            help='Face detector. Optional: retinaface_resnet50, retinaface_mobile0.25, YOLOv5l, YOLOv5n, dlib. \
-                Default: retinaface_resnet50')
-    parser.add_argument('--bg_upsampler', type=str, default='None', help='Background upsampler. Optional: realesrgan')
-    parser.add_argument('--face_upsample', action='store_true', help='Face upsampler after enhancement. Default: False')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
-    parser.add_argument('--suffix', type=str, default=None, help='Suffix of the restored faces. Default: None')
-    parser.add_argument('--save_video_fps', type=float, default=None, help='Frame rate for saving video. Default: None')
     args = parser.parse_args()
     # ------------------------ input & output ------------------------
-    w = args.fidelity_weight
-    input_video = False
-    if args.input_path.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): # input single img path
-        input_img_list = [args.input_path]
-        result_root = f'results/test_img_{w}'
-    elif args.input_path.endswith(('mp4', 'mov', 'avi', 'MP4', 'MOV', 'AVI')): # input video path
-        from basicsr.utils.video_util import VideoReader, VideoWriter
-        input_img_list = []
-        vidreader = VideoReader(args.input_path)
-        image = vidreader.get_frame()
-        while image is not None:
-            input_img_list.append(image)
-            image = vidreader.get_frame()
-        audio = vidreader.get_audio()
-        fps = vidreader.get_fps() if args.save_video_fps is None else args.save_video_fps
-        video_name = os.path.basename(args.input_path)[:-4]
-        result_root = f'results/{video_name}_{w}'
-        input_video = True
-        vidreader.close()
-    else: # input img folder
-        if args.input_path.endswith('/'):  # solve when path ends with /
-            args.input_path = args.input_path[:-1]
-        # scan all the jpg and png images
-        input_img_list = sorted(glob.glob(os.path.join(args.input_path, '*.[jpJP][pnPN]*[gG]')))
-        result_root = f'results/{os.path.basename(args.input_path)}_{w}'
-    if not args.output_path is None: # set output path
-        result_root = args.output_path
-    test_img_num = len(input_img_list)
-    if test_img_num == 0:
-        raise FileNotFoundError('No input image/video is found...\n'
-            '\tNote that --input_path for video should end with .mp4|.mov|.avi')
     # ------------------ set up background upsampler ------------------
     if args.bg_upsampler == 'realesrgan':
@@ -162,27 +109,19 @@ if __name__ == '__main__':
         device=device)
     # -------------------- start to processing ---------------------
-    for i, img_path in enumerate(input_img_list):
         # clean all the intermediate results to process the next image
         face_helper.clean_all()
-        if isinstance(img_path, str):
-            img_name = os.path.basename(img_path)
-            basename, ext = os.path.splitext(img_name)
-            print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
-            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
-        else: # for video processing
-            basename = str(i).zfill(6)
-            img_name = f'{video_name}_{basename}' if input_video else basename
-            print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
-            img = img_path
         if args.has_aligned:
             # the input faces are already cropped and aligned
             img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
-            face_helper.is_gray = is_gray(img, threshold=10)
-            if face_helper.is_gray:
-                print('Grayscale input: True')
             face_helper.cropped_faces = [img]
         else:
             face_helper.read_image(img)
@@ -211,7 +150,7 @@ if __name__ == '__main__':
                 restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype('uint8')
-            face_helper.add_restored_face(restored_face, cropped_face)
         # paste_back
         if not args.has_aligned:
@@ -239,36 +178,12 @@ if __name__ == '__main__':
                 save_face_name = f'{basename}.png'
             else:
                 save_face_name = f'{basename}_{idx:02d}.png'
-            if args.suffix is not None:
-                save_face_name = f'{save_face_name[:-4]}_{args.suffix}.png'
             save_restore_path = os.path.join(result_root, 'restored_faces', save_face_name)
             imwrite(restored_face, save_restore_path)
         # save restored img
         if not args.has_aligned and restored_img is not None:
-            if args.suffix is not None:
-                basename = f'{basename}_{args.suffix}'
             save_restore_path = os.path.join(result_root, 'final_results', f'{basename}.png')
             imwrite(restored_img, save_restore_path)
-    # save enhanced video
-    if input_video:
-        print('Video Saving...')
-        # load images
-        video_frames = []
-        img_list = sorted(glob.glob(os.path.join(result_root, 'final_results', '*.[jp][pn]g')))
-        for img_path in img_list:
-            img = cv2.imread(img_path)
-            video_frames.append(img)
-        # write images to video
-        height, width = video_frames[0].shape[:2]
-        if args.suffix is not None:
-            video_name = f'{video_name}_{args.suffix}.png'
-        save_restore_path = os.path.join(result_root, f'{video_name}.mp4')
-        vidwriter = VideoWriter(save_restore_path, height, width, fps, audio)
-        for f in video_frames:
-            vidwriter.write_frame(f)
-        vidwriter.close()
-    print(f'\nAll results are saved in {result_root}')

+# Modified by Shangchen Zhou from: https://github.com/TencentARC/GFPGAN/blob/master/inference_gfpgan.py
 import os
 import cv2
 import argparse
 from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
+import torch.nn.functional as F
 from basicsr.utils.registry import ARCH_REGISTRY
 }
 def set_realesrgan():
+    if not torch.cuda.is_available():  # CPU
         import warnings
+        warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. '
+                        'If you really want to use it, please modify the corresponding codes.',
                         category=RuntimeWarning)
+        bg_upsampler = None
+    else:
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from basicsr.utils.realesrgan_utils import RealESRGANer
+        model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+        bg_upsampler = RealESRGANer(
+            scale=2,
+            model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
+            model=model,
+            tile=args.bg_tile,
+            tile_pad=40,
+            pre_pad=0,
+            half=True)  # need to set False in CPU mode
+    return bg_upsampler
 if __name__ == '__main__':
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     parser = argparse.ArgumentParser()
+    parser.add_argument('--w', type=float, default=0.5, help='Balance the quality and fidelity')
+    parser.add_argument('--upscale', type=int, default=2, help='The final upsampling scale of the image. Default: 2')
+    parser.add_argument('--test_path', type=str, default='./inputs/cropped_faces')
+    parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces')
+    parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face')
     # large det_model: 'YOLOv5l', 'retinaface_resnet50'
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
+    parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
+    parser.add_argument('--draw_box', action='store_true')
+    parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
+    parser.add_argument('--face_upsample', action='store_true', help='face upsampler after enhancement.')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
     args = parser.parse_args()
     # ------------------------ input & output ------------------------
+    if args.test_path.endswith('/'):  # solve when path ends with /
+        args.test_path = args.test_path[:-1]
+    w = args.w
+    result_root = f'results/{os.path.basename(args.test_path)}_{w}'
     # ------------------ set up background upsampler ------------------
     if args.bg_upsampler == 'realesrgan':
         device=device)
     # -------------------- start to processing ---------------------
+    # scan all the jpg and png images
+    for img_path in sorted(glob.glob(os.path.join(args.test_path, '*.[jp][pn]g'))):
         # clean all the intermediate results to process the next image
         face_helper.clean_all()
+        img_name = os.path.basename(img_path)
+        print(f'Processing: {img_name}')
+        basename, ext = os.path.splitext(img_name)
+        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
         if args.has_aligned:
             # the input faces are already cropped and aligned
             img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
             face_helper.cropped_faces = [img]
         else:
             face_helper.read_image(img)
                 restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype('uint8')
+            face_helper.add_restored_face(restored_face)
         # paste_back
         if not args.has_aligned:
                 save_face_name = f'{basename}.png'
             else:
                 save_face_name = f'{basename}_{idx:02d}.png'
             save_restore_path = os.path.join(result_root, 'restored_faces', save_face_name)
             imwrite(restored_face, save_restore_path)
         # save restored img
         if not args.has_aligned and restored_img is not None:
             save_restore_path = os.path.join(result_root, 'final_results', f'{basename}.png')
             imwrite(restored_img, save_restore_path)
+    print(f'\nAll results are saved in {result_root}')

README.md CHANGED Viewed

@@ -4,9 +4,9 @@ emoji: 🐼
 colorFrom: blue
 colorTo: green
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 3.37.0
 app_file: app.py
 pinned: false
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -2,15 +2,13 @@
 This file is used for deploying hugging face demo:
 https://huggingface.co/spaces/sczhou/CodeFormer
 """
-import spaces
 import sys
 sys.path.append('CodeFormer')
 import os
 import cv2
-import numpy as np
 import torch
 import torch.nn.functional as F
-import uuid, threading, time, glob
 import gradio as gr
 from torchvision.transforms.functional import normalize
@@ -18,9 +16,9 @@ from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from basicsr.utils.realesrgan_utils import RealESRGANer
-from facelib.utils.misc import is_gray
 from basicsr.utils.registry import ARCH_REGISTRY
@@ -63,22 +61,10 @@ torch.hub.download_url_to_file(
     'https://raw.githubusercontent.com/sczhou/CodeFormer/master/inputs/cropped_faces/0729.png',
     '06.png')
-def imread_unicode_safe(path):
-    with open(path, "rb") as f:
-        data = np.frombuffer(f.read(), dtype=np.uint8)
-    return cv2.imdecode(data, cv2.IMREAD_COLOR)
-def delayed_remove(path, delay=60):
-    time.sleep(delay)
-    try:
-        if os.path.exists(path):
-            os.remove(path)
-            print(f"[CLEANUP] removed: {path}")
-        else:
-            print(f"[CLEANUP] already gone: {path}")
-    except Exception as e:
-        print(f"[CLEANUP] failed: {path} | {e}")
 # set enhancer with RealESRGAN
 def set_realesrgan():
@@ -118,7 +104,6 @@ codeformer_net.eval()
 os.makedirs('output', exist_ok=True)
-@spaces.GPU
 def inference(image, face_align, background_enhance, face_upsample, upscale, codeformer_fidelity):
     """Run a single prediction on the model"""
     try: # global try
@@ -127,6 +112,7 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
         draw_box = False
         detection_model = "retinaface_resnet50"
         face_align = face_align if face_align is not None else True
         background_enhance = background_enhance if background_enhance is not None else True
         face_upsample = face_upsample if face_upsample is not None else True
@@ -134,25 +120,8 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
         has_aligned = not face_align
         upscale = 1 if has_aligned else upscale
-        if isinstance(image, dict):
-            image_path = image.get("name")
-        elif isinstance(image, str):
-            image_path = image
-        else:
-            image_path = None
-            raise gr.Error("Invalid input image.")
-        if not os.path.exists(image_path):
-            raise gr.Error("Invalid input image.")
-        print('Inp:', image_path, background_enhance, face_upsample, upscale, codeformer_fidelity)
-        img = imread_unicode_safe(image_path)
-        if img is None:
-            raise gr.Error("Failed to read input image.")
         print('\timage size:', img.shape)
         upscale = int(upscale) # convert type to int
@@ -160,19 +129,11 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
             upscale = 4
         if upscale > 2 and max(img.shape[:2])>1000: # avoid memory exceeded due to too large img resolution
             upscale = 2
-        if min(img.shape[:2]) > 1100 or max(img.shape[:2])>1500: # avoid memory exceeded due to too large img resolution
             upscale = 1
             background_enhance = False
             face_upsample = False
-        h, w = img.shape[:2]
-        if h * w > 4_000_000: # avoid memory exceeded due to too large img resolution
-            raise gr.Error(
-                "Image resolution is too large (>4 megapixels). "
-                "To keep the demo responsive and avoid long queue times, this case is skipped. "
-                "For such inputs, please deploy this demo locally and remove this limit."
-            )
         face_helper = FaceRestoreHelper(
             upscale,
             face_size=512,
@@ -202,18 +163,12 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
             # align and warp each face
             face_helper.align_warp_face()
-            if min(img.shape[:2]) > 1000 and num_det_faces > 15:
-                raise gr.Error(
-                    "Too many faces detected (>15) in a high-resolution image. "
-                    "To keep the demo responsive and avoid long queue times, this case is skipped. "
-                    "For such inputs, please deploy this demo locally and remove this limit."
-                )
         # face restoration for each cropped face
         for idx, cropped_face in enumerate(face_helper.cropped_faces):
             # prepare data
-            cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
             normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
             cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
@@ -227,10 +182,12 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
                 torch.cuda.empty_cache()
             except RuntimeError as error:
                 print(f"Failed inference for CodeFormer: {error}")
-                restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype("uint8")
-            face_helper.add_restored_face(restored_face, cropped_face)
         # paste_back
         if not has_aligned:
@@ -256,33 +213,15 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
             restored_img = restored_face
         # save restored img
-        # save_path = f'output/out.png'
-        # imwrite(restored_img, str(save_path))
-        # restored_img = cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB)
-        # return restored_img
-        #save restored img
-        save_path = f"output/{uuid.uuid4().hex}.png"
-        imwrite(restored_img, save_path)
-        print(f"[SAVE] path={save_path} outputs={len(glob.glob('output/*.png'))}")
-        threading.Thread(
-            target=delayed_remove,
-            args=(save_path,30),
-            daemon=True
-        ).start()
-        return save_path, None
-    except gr.Error:
-        raise
     except Exception as error:
-        print('[UNEXPECTED ERROR]', error)
-        raise gr.Error("Unexpected error. Please try another image.")
 title = "CodeFormer: Robust Face Restoration and Enhancement Network"
@@ -325,12 +264,6 @@ If you have any questions, please feel free to reach me out at <b>shangchenzhou@
 td {
     padding-right: 0px !important;
 }
-.gradio-container-4-37-2 .prose table, .gradio-container-4-37-2 .prose tr, .gradio-container-4-37-2 .prose td, .gradio-container-4-37-2 .prose th {
-    border: 0px solid #ffffff;
-    border-bottom: 0px solid #ffffff;
-}
 </style>
 <table>
@@ -343,39 +276,29 @@ td {
 <center><img src='https://api.infinitescript.com/badgen/count?name=sczhou/CodeFormer&ltext=Visitors&color=6dc9aa' alt='visitors'></center>
 """
-with gr.Blocks() as demo:
-    gr.Markdown(title)
-    gr.Markdown(description)
-    with gr.Row():
-        with gr.Column():
-            input_img = gr.Image(type="filepath", label="Input")
-            face_align = gr.Checkbox(value=True, label="Pre_Face_Align")
-            background_enhance = gr.Checkbox(value=True, label="Background_Enhance")
-            face_enhance = gr.Checkbox(value=True, label="Face_Upsample")
-            upscale_factor = gr.Number(value=2, label="Rescaling_Factor (up to 4)")
-            codeformer_fidelity = gr.Slider(0, 1, value=0.5, step=0.01, label='Codeformer_Fidelity (0 for better quality, 1 for better identity)')
-            submit = gr.Button('Enhance Image')
-        with gr.Column():
-            output_img = gr.Image(type="filepath", label="Output")
-            note = gr.Markdown("**Please download the output within 30 seconds.**")
-    inps = [input_img, face_align, background_enhance, face_enhance, upscale_factor, codeformer_fidelity]
-    outs = [output_img, note]
-    submit.click(fn=inference, inputs=inps, outputs=outs)
-    ex = gr.Examples([
         ['01.png', True, True, True, 2, 0.7],
         ['02.jpg', True, True, True, 2, 0.7],
         ['03.jpg', True, True, True, 2, 0.7],
         ['04.jpg', True, True, True, 2, 0.1],
         ['05.jpg', True, True, True, 2, 0.1],
         ['06.png', False, True, True, 1, 0.5]
-      ],
-        inputs=inps,
-        cache_examples=False)
-    gr.Markdown(article)
 DEBUG = os.getenv('DEBUG') == '1'
-demo.queue(api_open=False, max_size=10, default_concurrency_limit=2)
 demo.launch(debug=DEBUG)

 This file is used for deploying hugging face demo:
 https://huggingface.co/spaces/sczhou/CodeFormer
 """
 import sys
 sys.path.append('CodeFormer')
 import os
 import cv2
 import torch
 import torch.nn.functional as F
 import gradio as gr
 from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
+from facelib.utils.misc import is_gray
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from basicsr.utils.realesrgan_utils import RealESRGANer
 from basicsr.utils.registry import ARCH_REGISTRY
     'https://raw.githubusercontent.com/sczhou/CodeFormer/master/inputs/cropped_faces/0729.png',
     '06.png')
+def imread(img_path):
+    img = cv2.imread(img_path)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    return img
 # set enhancer with RealESRGAN
 def set_realesrgan():
 os.makedirs('output', exist_ok=True)
 def inference(image, face_align, background_enhance, face_upsample, upscale, codeformer_fidelity):
     """Run a single prediction on the model"""
     try: # global try
         draw_box = False
         detection_model = "retinaface_resnet50"
+        print('Inp:', image, background_enhance, face_upsample, upscale, codeformer_fidelity)
         face_align = face_align if face_align is not None else True
         background_enhance = background_enhance if background_enhance is not None else True
         face_upsample = face_upsample if face_upsample is not None else True
         has_aligned = not face_align
         upscale = 1 if has_aligned else upscale
+        img = cv2.imread(str(image), cv2.IMREAD_COLOR)
         print('\timage size:', img.shape)
         upscale = int(upscale) # convert type to int
             upscale = 4
         if upscale > 2 and max(img.shape[:2])>1000: # avoid memory exceeded due to too large img resolution
             upscale = 2
+        if max(img.shape[:2]) > 1500: # avoid memory exceeded due to too large img resolution
             upscale = 1
             background_enhance = False
             face_upsample = False
         face_helper = FaceRestoreHelper(
             upscale,
             face_size=512,
             # align and warp each face
             face_helper.align_warp_face()
         # face restoration for each cropped face
         for idx, cropped_face in enumerate(face_helper.cropped_faces):
             # prepare data
+            cropped_face_t = img2tensor(
+                cropped_face / 255.0, bgr2rgb=True, float32=True
+            )
             normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
             cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
                 torch.cuda.empty_cache()
             except RuntimeError as error:
                 print(f"Failed inference for CodeFormer: {error}")
+                restored_face = tensor2img(
+                    cropped_face_t, rgb2bgr=True, min_max=(-1, 1)
+                )
             restored_face = restored_face.astype("uint8")
+            face_helper.add_restored_face(restored_face)
         # paste_back
         if not has_aligned:
             restored_img = restored_face
         # save restored img
+        save_path = f'output/out.png'
+        imwrite(restored_img, str(save_path))
+        restored_img = cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB)
+        return restored_img
     except Exception as error:
+        print('Global exception', error)
+        return None, None
 title = "CodeFormer: Robust Face Restoration and Enhancement Network"
 td {
     padding-right: 0px !important;
 }
 </style>
 <table>
 <center><img src='https://api.infinitescript.com/badgen/count?name=sczhou/CodeFormer&ltext=Visitors&color=6dc9aa' alt='visitors'></center>
 """
+demo = gr.Interface(
+    inference, [
+        gr.Image(type="filepath", label="Input"),
+        gr.Checkbox(value=True, label="Pre_Face_Align"),
+        gr.Checkbox(value=True, label="Background_Enhance"),
+        gr.Checkbox(value=True, label="Face_Upsample"),
+        gr.Number(value=2, label="Rescaling_Factor (up to 4)"),
+        gr.Slider(0, 1, value=0.5, step=0.01, label='Codeformer_Fidelity (0 for better quality, 1 for better identity)')
+    ], [
+        gr.Image(type="numpy", label="Output").style(height='auto')
+    ],
+    title=title,
+    description=description,
+    article=article,
+    examples=[
         ['01.png', True, True, True, 2, 0.7],
         ['02.jpg', True, True, True, 2, 0.7],
         ['03.jpg', True, True, True, 2, 0.7],
         ['04.jpg', True, True, True, 2, 0.1],
         ['05.jpg', True, True, True, 2, 0.1],
         ['06.png', False, True, True, 1, 0.5]
+      ])
 DEBUG = os.getenv('DEBUG') == '1'
+demo.queue(api_open=False, concurrency_count=2, max_size=10)
 demo.launch(debug=DEBUG)