08f4ccb5459ba8d71ac7e1cb832d4f5e480338e9d9664a469baa1b1252917877

Browse files

Files changed (13) hide show

Rope.bat +2 -0
Rope.py +5 -0
rope/Models.py +1961 -0
rope/Styles.py +293 -0
rope/VideoManager.py +1242 -0
rope/media/tl_beg_off.png +0 -0
rope/media/tl_beg_on.png +0 -0
rope/media/tl_left_hover.png +0 -0
rope/media/tl_left_off.png +0 -0
rope/media/tl_left_on.png +0 -0
rope/media/tl_right_hover.png +0 -0
rope/media/tl_right_off.png +0 -0
rope/media/tl_right_on.png +0 -0

Rope.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ call conda activate Rope && python Rope.py
2	+ pause

Rope.py ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/usr/bin/env python3
+from rope import Coordinator
+if __name__ == "__main__":
+    Coordinator.run()

rope/Models.py ADDED Viewed

	@@ -0,0 +1,1961 @@

+import cv2
+import numpy as np
+from skimage import transform as trans
+import torch
+import torchvision
+torchvision.disable_beta_transforms_warning()
+from torchvision.transforms import v2
+from numpy.linalg import norm as l2norm
+import onnxruntime
+import onnx
+from itertools import product as product
+import subprocess as sp
+onnxruntime.set_default_logger_severity(4)
+onnxruntime.log_verbosity_level = -1
+import rope.FaceUtil as faceutil
+import pickle
+class Models():
+    def __init__(self):
+        self.arcface_dst = np.array( [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]], dtype=np.float32)
+        self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+        self.retinaface_model = []
+        self.yoloface_model = []
+        self.scrdf_model = []
+        self.yunet_model = []
+        self.face_landmark_68_model = []
+        self.face_landmark_3d68_model = []
+        self.mean_lmk = []
+        self.face_landmark_98_model = []
+        self.face_landmark_106_model = []
+        self.face_landmark_478_model = []
+        self.face_blendshapes_model = []
+        self.resnet50_model, self.anchors  = [], []
+        self.insight106_model = []
+        self.recognition_model = []
+        self.swapper_model = []
+        self.swapper_model_kps = []
+        self.swapper_model_swap = []
+        self.emap = []
+        self.GFPGAN_model = []
+        self.GPEN_256_model = []
+        self.GPEN_512_model = []
+        self.GPEN_1024_model = []
+        self.codeformer_model = []
+        self.occluder_model = []
+        self.faceparser_model = []
+        self.syncvec = torch.empty((1,1), dtype=torch.float32, device='cuda:0')
+        self.normalize = v2.Normalize(mean = [ 0., 0., 0. ],
+                                      std = [ 1/1.0, 1/1.0, 1/1.0 ])
+        self.LandmarksSubsetIdxs = [
+            0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39,
+            40, 46, 52, 53, 54, 55, 58, 61, 63, 65, 66, 67, 70, 78, 80,
+            81, 82, 84, 87, 88, 91, 93, 95, 103, 105, 107, 109, 127, 132, 133,
+            136, 144, 145, 146, 148, 149, 150, 152, 153, 154, 155, 157, 158, 159, 160,
+            161, 162, 163, 168, 172, 173, 176, 178, 181, 185, 191, 195, 197, 234, 246,
+            249, 251, 263, 267, 269, 270, 276, 282, 283, 284, 285, 288, 291, 293, 295,
+            296, 297, 300, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324, 332, 334,
+            336, 338, 356, 361, 362, 365, 373, 374, 375, 377, 378, 379, 380, 381, 382,
+            384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405, 409, 415, 454,
+            466, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477
+        ]
+    def get_gpu_memory(self):
+        command = "nvidia-smi --query-gpu=memory.total --format=csv"
+        memory_total_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
+        memory_total = [int(x.split()[0]) for i, x in enumerate(memory_total_info)]
+        command = "nvidia-smi --query-gpu=memory.free --format=csv"
+        memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
+        memory_free = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
+        memory_used = memory_total[0] - memory_free[0]
+        return memory_used, memory_total[0]
+    def run_detect(self, img, detect_mode='Retinaface', max_num=1, score=0.5, use_landmark_detection=False, landmark_detect_mode='98', landmark_score=0.5, from_points=False):
+        bboxes = []
+        kpss = []
+        if detect_mode=='Retinaface':
+            if not self.retinaface_model:
+                self.retinaface_model = onnxruntime.InferenceSession('./models/det_10g.onnx', providers=self.providers)
+            bboxes, kpss = self.detect_retinaface(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
+        elif detect_mode=='SCRDF':
+            if not self.scrdf_model:
+                self.scrdf_model = onnxruntime.InferenceSession('./models/scrfd_2.5g_bnkps.onnx', providers=self.providers)
+            bboxes, kpss = self.detect_scrdf(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
+        elif detect_mode=='Yolov8':
+            if not self.yoloface_model:
+                self.yoloface_model = onnxruntime.InferenceSession('./models/yoloface_8n.onnx', providers=self.providers)
+                #self.insight106_model = onnxruntime.InferenceSession('./models/2d106det.onnx', providers=self.providers)
+            bboxes, kpss = self.detect_yoloface(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
+        elif detect_mode=='Yunet':
+            if not self.yunet_model:
+                self.yunet_model = onnxruntime.InferenceSession('./models/yunet_n_640_640.onnx', providers=self.providers)
+            bboxes, kpss = self.detect_yunet(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
+        return bboxes, kpss
+    def run_detect_landmark(self, img, bbox, det_kpss, detect_mode='98', score=0.5, from_points=False):
+        kpss = []
+        scores = []
+        if detect_mode=='5':
+            if not self.resnet50_model:
+                self.resnet50_model = onnxruntime.InferenceSession("./models/res50.onnx", providers=self.providers)
+                feature_maps = [[64, 64], [32, 32], [16, 16]]
+                min_sizes = [[16, 32], [64, 128], [256, 512]]
+                steps = [8, 16, 32]
+                image_size = 512
+                for k, f in enumerate(feature_maps):
+                    min_size_array = min_sizes[k]
+                    for i, j in product(range(f[0]), range(f[1])):
+                        for min_size in min_size_array:
+                            s_kx = min_size / image_size
+                            s_ky = min_size / image_size
+                            dense_cx = [x * steps[k] / image_size for x in [j + 0.5]]
+                            dense_cy = [y * steps[k] / image_size for y in [i + 0.5]]
+                            for cy, cx in product(dense_cy, dense_cx):
+                                self.anchors += [cx, cy, s_kx, s_ky]
+            kpss, scores = self.detect_face_landmark_5(img, bbox=bbox, det_kpss=det_kpss, from_points=from_points)
+        elif detect_mode=='68':
+            if not self.face_landmark_68_model:
+                self.face_landmark_68_model = onnxruntime.InferenceSession('./models/2dfan4.onnx', providers=self.providers)
+            kpss, scores = self.detect_face_landmark_68(img, bbox=bbox, det_kpss=det_kpss, convert68_5=True, from_points=from_points)
+        elif detect_mode=='3d68':
+            if not self.face_landmark_3d68_model:
+                self.face_landmark_3d68_model = onnxruntime.InferenceSession('./models/1k3d68.onnx', providers=self.providers)
+                with open('./models/meanshape_68.pkl', 'rb') as f:
+                    self.mean_lmk = pickle.load(f)
+            kpss, scores = self.detect_face_landmark_3d68(img, bbox=bbox, det_kpss=det_kpss, convert68_5=True, from_points=from_points)
+            return kpss, scores
+        elif detect_mode=='98':
+            if not self.face_landmark_98_model:
+                self.face_landmark_98_model = onnxruntime.InferenceSession('./models/peppapig_teacher_Nx3x256x256.onnx', providers=self.providers)
+            kpss, scores = self.detect_face_landmark_98(img, bbox=bbox, det_kpss=det_kpss, convert98_5=True, from_points=from_points)
+        elif detect_mode=='106':
+            if not self.face_landmark_106_model:
+                self.face_landmark_106_model = onnxruntime.InferenceSession('./models/2d106det.onnx', providers=self.providers)
+            kpss, scores = self.detect_face_landmark_106(img, bbox=bbox, det_kpss=det_kpss, convert106_5=True, from_points=from_points)
+            return kpss, scores
+        elif detect_mode=='478':
+            if not self.face_landmark_478_model:
+                self.face_landmark_478_model = onnxruntime.InferenceSession('./models/face_landmarks_detector_Nx3x256x256.onnx', providers=self.providers)
+            if not self.face_blendshapes_model:
+                self.face_blendshapes_model = onnxruntime.InferenceSession('./models/face_blendshapes_Nx146x2.onnx', providers=self.providers)
+            kpss, scores = self.detect_face_landmark_478(img, bbox=bbox, det_kpss=det_kpss, convert478_5=True, from_points=from_points)
+            return kpss, scores
+        if len(kpss) > 0:
+            if len(scores) > 0:
+                if np.mean(scores) >= score:
+                    return kpss, scores
+            else:
+                return kpss, scores
+        return [], []
+    def delete_models(self):
+        self.retinaface_model = []
+        self.yoloface_model = []
+        self.scrdf_model = []
+        self.yunet_model = []
+        self.face_landmark_68_model = []
+        self.face_landmark_3d68_model = []
+        self.mean_lmk = []
+        self.face_landmark_98_model = []
+        self.face_landmark_106_model = []
+        self.face_landmark_478_model = []
+        self.face_blendshapes_model = []
+        self.resnet50_model = []
+        self.insight106_model = []
+        self.recognition_model = []
+        self.swapper_model = []
+        self.GFPGAN_model = []
+        self.GPEN_256_model = []
+        self.GPEN_512_model = []
+        self.GPEN_1024_model = []
+        self.codeformer_model = []
+        self.occluder_model = []
+        self.faceparser_model = []
+    def run_recognize(self, img, kps):
+        if not self.recognition_model:
+            self.recognition_model = onnxruntime.InferenceSession('./models/w600k_r50.onnx', providers=self.providers)
+        embedding, cropped_image = self.recognize(img, kps)
+        return embedding, cropped_image
+    def calc_swapper_latent(self, source_embedding):
+        if not self.swapper_model:
+            graph = onnx.load("./models/inswapper_128.fp16.onnx").graph
+            self.emap = onnx.numpy_helper.to_array(graph.initializer[-1])
+        n_e = source_embedding / l2norm(source_embedding)
+        latent = n_e.reshape((1,-1))
+        latent = np.dot(latent, self.emap)
+        latent /= np.linalg.norm(latent)
+        return latent
+    def run_swapper(self, image, embedding, output):
+        if not self.swapper_model:
+            cuda_options = {"arena_extend_strategy": "kSameAsRequested", 'cudnn_conv_algo_search': 'DEFAULT'}
+            sess_options = onnxruntime.SessionOptions()
+            sess_options.enable_cpu_mem_arena = False
+            # self.swapper_model = onnxruntime.InferenceSession( "./models/inswapper_128_last_cubic.onnx", sess_options, providers=[('CUDAExecutionProvider', cuda_options), 'CPUExecutionProvider'])
+            self.swapper_model = onnxruntime.InferenceSession( "./models/inswapper_128.fp16.onnx", providers=self.providers)
+        io_binding = self.swapper_model.io_binding()
+        io_binding.bind_input(name='target', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,128,128), buffer_ptr=image.data_ptr())
+        io_binding.bind_input(name='source', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,512), buffer_ptr=embedding.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,128,128), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.swapper_model.run_with_iobinding(io_binding)
+    def run_swap_stg1(self, embedding):
+        # Load model
+        if not self.swapper_model_kps:
+            self.swapper_model_kps = onnxruntime.InferenceSession( "./models/inswapper_kps.onnx", providers=self.providers)
+        # Wacky data structure
+        io_binding = self.swapper_model_kps.io_binding()
+        kps_1 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_2 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_3 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_4 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_5 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_6 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_7 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_8 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_9 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_10 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_11 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        kps_12 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
+        # Bind the data structures
+        io_binding.bind_input(name='source', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 512), buffer_ptr=embedding.data_ptr())
+        io_binding.bind_output(name='1', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_1.data_ptr())
+        io_binding.bind_output(name='2', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_2.data_ptr())
+        io_binding.bind_output(name='3', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_3.data_ptr())
+        io_binding.bind_output(name='4', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_4.data_ptr())
+        io_binding.bind_output(name='5', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_5.data_ptr())
+        io_binding.bind_output(name='6', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_6.data_ptr())
+        io_binding.bind_output(name='7', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_7.data_ptr())
+        io_binding.bind_output(name='8', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_8.data_ptr())
+        io_binding.bind_output(name='9', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_9.data_ptr())
+        io_binding.bind_output(name='10', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_10.data_ptr())
+        io_binding.bind_output(name='11', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_11.data_ptr())
+        io_binding.bind_output(name='12', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_12.data_ptr())
+        self.syncvec.cpu()
+        self.swapper_model_kps.run_with_iobinding(io_binding)
+        # List of pointers
+        holder = []
+        holder.append(kps_1)
+        holder.append(kps_2)
+        holder.append(kps_3)
+        holder.append(kps_4)
+        holder.append(kps_5)
+        holder.append(kps_6)
+        holder.append(kps_7)
+        holder.append(kps_8)
+        holder.append(kps_9)
+        holder.append(kps_10)
+        holder.append(kps_11)
+        holder.append(kps_12)
+        return holder
+    def run_swap_stg2(self, image, holder, output):
+        if not self.swapper_model_swap:
+            self.swapper_model_swap = onnxruntime.InferenceSession( "./models/inswapper_swap.onnx", providers=self.providers)
+        io_binding = self.swapper_model_swap.io_binding()
+        io_binding.bind_input(name='target', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 3, 128, 128), buffer_ptr=image.data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_170', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[0].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_224', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[1].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_278', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[2].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_332', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[3].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_386', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[4].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_440', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[5].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_494', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[6].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_548', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[7].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_602', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[8].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_656', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[9].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_710', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[10].data_ptr())
+        io_binding.bind_input(name='onnx::Unsqueeze_764', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[11].data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 3, 128, 128), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.swapper_model_swap.run_with_iobinding(io_binding)
+    def run_GFPGAN(self, image, output):
+        if not self.GFPGAN_model:
+            # cuda_options = {"arena_extend_strategy": "kSameAsRequested", 'cudnn_conv_algo_search': 'DEFAULT'}
+            # sess_options = onnxruntime.SessionOptions()
+            # sess_options.enable_cpu_mem_arena = False
+            # self.GFPGAN_model = onnxruntime.InferenceSession( "./models/GFPGANv1.4.onnx", sess_options, providers=[("CUDAExecutionProvider", cuda_options), 'CPUExecutionProvider'])
+            self.GFPGAN_model = onnxruntime.InferenceSession( "./models/GFPGANv1.4.onnx", providers=self.providers)
+        io_binding = self.GFPGAN_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.GFPGAN_model.run_with_iobinding(io_binding)
+    def run_GPEN_1024(self, image, output):
+        if not self.GPEN_1024_model:
+            self.GPEN_1024_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-1024.onnx", providers=self.providers)
+        io_binding = self.GPEN_1024_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,1024,1024), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,1024,1024), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.GPEN_1024_model.run_with_iobinding(io_binding)
+    def run_GPEN_512(self, image, output):
+        if not self.GPEN_512_model:
+            self.GPEN_512_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-512.onnx", providers=self.providers)
+        io_binding = self.GPEN_512_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.GPEN_512_model.run_with_iobinding(io_binding)
+    def run_GPEN_256(self, image, output):
+        if not self.GPEN_256_model:
+            self.GPEN_256_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-256.onnx", providers=self.providers)
+        io_binding = self.GPEN_256_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.GPEN_256_model.run_with_iobinding(io_binding)
+    def run_codeformer(self, image, output):
+        if not self.codeformer_model:
+            self.codeformer_model = onnxruntime.InferenceSession( "./models/codeformer_fp16.onnx", providers=self.providers)
+        io_binding = self.codeformer_model.io_binding()
+        io_binding.bind_input(name='x', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        w = np.array([0.9], dtype=np.double)
+        io_binding.bind_cpu_input('w', w)
+        io_binding.bind_output(name='y', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
+        self.syncvec.cpu()
+        self.codeformer_model.run_with_iobinding(io_binding)
+    def run_occluder(self, image, output):
+        if not self.occluder_model:
+            self.occluder_model = onnxruntime.InferenceSession("./models/occluder.onnx", providers=self.providers)
+        io_binding = self.occluder_model.io_binding()
+        io_binding.bind_input(name='img', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,1,256,256), buffer_ptr=output.data_ptr())
+        # torch.cuda.synchronize('cuda')
+        self.syncvec.cpu()
+        self.occluder_model.run_with_iobinding(io_binding)
+    def run_faceparser(self, image, output):
+        if not self.faceparser_model:
+            self.faceparser_model = onnxruntime.InferenceSession("./models/faceparser_fp16.onnx", providers=self.providers)
+        io_binding = self.faceparser_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='out', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,19,512,512), buffer_ptr=output.data_ptr())
+        # torch.cuda.synchronize('cuda')
+        self.syncvec.cpu()
+        self.faceparser_model.run_with_iobinding(io_binding)
+    def detect_retinaface(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
+        if use_landmark_detection:
+            img_landmark = img.clone()
+        # Resize image to fit within the input_size
+        input_size = (640, 640)
+        im_ratio = torch.div(img.size()[1], img.size()[2])
+        # model_ratio = float(input_size[1]) / input_size[0]
+        model_ratio = 1.0
+        if im_ratio>model_ratio:
+            new_height = input_size[1]
+            new_width = int(new_height / im_ratio)
+        else:
+            new_width = input_size[0]
+            new_height = int(new_width * im_ratio)
+        det_scale = torch.div(new_height,  img.size()[1])
+        resize = v2.Resize((new_height, new_width), antialias=True)
+        img = resize(img)
+        img = img.permute(1,2,0)
+        det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
+        det_img[:new_height,:new_width,  :] = img
+        # Switch to BGR and normalize
+        det_img = det_img[:, :, [2,1,0]]
+        det_img = torch.sub(det_img, 127.5)
+        det_img = torch.div(det_img, 128.0)
+        det_img = det_img.permute(2, 0, 1) #3,128,128
+        # Prepare data and find model parameters
+        det_img = torch.unsqueeze(det_img, 0).contiguous()
+        io_binding = self.retinaface_model.io_binding()
+        io_binding.bind_input(name='input.1', device_type='cuda', device_id=0, element_type=np.float32,  shape=det_img.size(), buffer_ptr=det_img.data_ptr())
+        io_binding.bind_output('448', 'cuda')
+        io_binding.bind_output('471', 'cuda')
+        io_binding.bind_output('494', 'cuda')
+        io_binding.bind_output('451', 'cuda')
+        io_binding.bind_output('474', 'cuda')
+        io_binding.bind_output('497', 'cuda')
+        io_binding.bind_output('454', 'cuda')
+        io_binding.bind_output('477', 'cuda')
+        io_binding.bind_output('500', 'cuda')
+        # Sync and run model
+        self.syncvec.cpu()
+        self.retinaface_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        input_height = det_img.shape[2]
+        input_width = det_img.shape[3]
+        fmc = 3
+        center_cache = {}
+        scores_list = []
+        bboxes_list = []
+        kpss_list = []
+        for idx, stride in enumerate([8, 16, 32]):
+            scores = net_outs[idx]
+            bbox_preds = net_outs[idx+fmc]
+            bbox_preds = bbox_preds * stride
+            kps_preds = net_outs[idx+fmc*2] * stride
+            height = input_height // stride
+            width = input_width // stride
+            K = height * width
+            key = (height, width, stride)
+            if key in center_cache:
+                anchor_centers = center_cache[key]
+            else:
+                anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+                anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
+                anchor_centers = np.stack([anchor_centers]*2, axis=1).reshape( (-1,2) )
+                if len(center_cache)<100:
+                    center_cache[key] = anchor_centers
+            pos_inds = np.where(scores>=score)[0]
+            x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
+            y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
+            x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
+            y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
+            bboxes = np.stack([x1, y1, x2, y2], axis=-1)
+            pos_scores = scores[pos_inds]
+            pos_bboxes = bboxes[pos_inds]
+            scores_list.append(pos_scores)
+            bboxes_list.append(pos_bboxes)
+            preds = []
+            for i in range(0, kps_preds.shape[1], 2):
+                px = anchor_centers[:, i%2] + kps_preds[:, i]
+                py = anchor_centers[:, i%2+1] + kps_preds[:, i+1]
+                preds.append(px)
+                preds.append(py)
+            kpss = np.stack(preds, axis=-1)
+            #kpss = kps_preds
+            kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
+            pos_kpss = kpss[pos_inds]
+            kpss_list.append(pos_kpss)
+        scores = np.vstack(scores_list)
+        scores_ravel = scores.ravel()
+        order = scores_ravel.argsort()[::-1]
+        det_scale = det_scale.numpy()###
+        bboxes = np.vstack(bboxes_list) / det_scale
+        kpss = np.vstack(kpss_list) / det_scale
+        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+        pre_det = pre_det[order, :]
+        dets = pre_det
+        thresh = 0.4
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = dets[:, 2]
+        y2 = dets[:, 3]
+        scoresb = dets[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        orderb = scoresb.argsort()[::-1]
+        keep = []
+        while orderb.size > 0:
+            i = orderb[0]
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[orderb[1:]])
+            yy1 = np.maximum(y1[i], y1[orderb[1:]])
+            xx2 = np.minimum(x2[i], x2[orderb[1:]])
+            yy2 = np.minimum(y2[i], y2[orderb[1:]])
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
+            inds = np.where(ovr <= thresh)[0]
+            orderb = orderb[inds + 1]
+        det = pre_det[keep, :]
+        kpss = kpss[order,:,:]
+        kpss = kpss[keep,:,:]
+        if max_num > 0 and det.shape[0] > max_num:
+            area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+            det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
+            offsets = np.vstack([
+                (det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
+                (det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
+            ])
+            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+            values = area - offset_dist_squared * 2.0  # some extra weight on the centering
+            bindex = np.argsort(values)[::-1]  # some extra weight on the centering
+            bindex = bindex[0:max_num]
+            det = det[bindex, :]
+            if kpss is not None:
+                kpss = kpss[bindex, :]
+        score_values = det[:, 4]
+        # delete score column
+        det = np.delete(det, 4, 1)
+        if use_landmark_detection and len(kpss) > 0:
+            for i in range(kpss.shape[0]):
+                landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, det[i], kpss[i], landmark_detect_mode, landmark_score, from_points)
+                if len(landmark_kpss) > 0:
+                    if len(landmark_scores) > 0:
+                        #print(np.mean(landmark_scores))
+                        #print(np.mean(score_values[i]))
+                        if np.mean(landmark_scores) > np.mean(score_values[i]):
+                            kpss[i] = landmark_kpss
+                    else:
+                        kpss[i] = landmark_kpss
+        return det, kpss
+    def detect_retinaface2(self, img, max_num, score):
+        # Resize image to fit within the input_size
+        input_size = (640, 640)
+        im_ratio = torch.div(img.size()[1], img.size()[2])
+        # model_ratio = float(input_size[1]) / input_size[0]
+        model_ratio = 1.0
+        if im_ratio > model_ratio:
+            new_height = input_size[1]
+            new_width = int(new_height / im_ratio)
+        else:
+            new_width = input_size[0]
+            new_height = int(new_width * im_ratio)
+        det_scale = torch.div(new_height, img.size()[1])
+        resize = v2.Resize((new_height, new_width), antialias=True)
+        img = resize(img)
+        img = img.permute(1, 2, 0)
+        det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
+        det_img[:new_height, :new_width, :] = img
+        # Switch to BGR and normalize
+        det_img = det_img[:, :, [2, 1, 0]]
+        det_img = torch.sub(det_img, 127.5)
+        det_img = torch.div(det_img, 128.0)
+        det_img = det_img.permute(2, 0, 1)  # 3,128,128
+        # Prepare data and find model parameters
+        det_img = torch.unsqueeze(det_img, 0).contiguous()
+        io_binding = self.retinaface_model.io_binding()
+        io_binding.bind_input(name='input.1', device_type='cuda', device_id=0, element_type=np.float32, shape=det_img.size(), buffer_ptr=det_img.data_ptr())
+        io_binding.bind_output('448', 'cuda')
+        io_binding.bind_output('471', 'cuda')
+        io_binding.bind_output('494', 'cuda')
+        io_binding.bind_output('451', 'cuda')
+        io_binding.bind_output('474', 'cuda')
+        io_binding.bind_output('497', 'cuda')
+        io_binding.bind_output('454', 'cuda')
+        io_binding.bind_output('477', 'cuda')
+        io_binding.bind_output('500', 'cuda')
+        # Sync and run model
+        self.syncvec.cpu()
+        self.retinaface_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        input_height = det_img.shape[2]
+        input_width = det_img.shape[3]
+        fmc = 3
+        center_cache = {}
+        scores_list = []
+        bboxes_list = []
+        kpss_list = []
+        for idx, stride in enumerate([8, 16, 32]):
+            scores = net_outs[idx]
+            bbox_preds = net_outs[idx + fmc]
+            bbox_preds = bbox_preds * stride
+            kps_preds = net_outs[idx + fmc * 2] * stride
+            height = input_height // stride
+            width = input_width // stride
+            K = height * width
+            key = (height, width, stride)
+            if key in center_cache:
+                anchor_centers = center_cache[key]
+            else:
+                anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+                anchor_centers = (anchor_centers * stride).reshape((-1, 2))
+                anchor_centers = np.stack([anchor_centers] * 2, axis=1).reshape((-1, 2))
+                if len(center_cache) < 100:
+                    center_cache[key] = anchor_centers
+            pos_inds = np.where(scores >= score)[0]
+            x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
+            y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
+            x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
+            y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
+            bboxes = np.stack([x1, y1, x2, y2], axis=-1)
+            pos_scores = scores[pos_inds]
+            pos_bboxes = bboxes[pos_inds]
+            scores_list.append(pos_scores)
+            bboxes_list.append(pos_bboxes)
+            preds = []
+            for i in range(0, kps_preds.shape[1], 2):
+                px = anchor_centers[:, i % 2] + kps_preds[:, i]
+                py = anchor_centers[:, i % 2 + 1] + kps_preds[:, i + 1]
+                preds.append(px)
+                preds.append(py)
+            kpss = np.stack(preds, axis=-1)
+            # kpss = kps_preds
+            kpss = kpss.reshape((kpss.shape[0], -1, 2))
+            pos_kpss = kpss[pos_inds]
+            kpss_list.append(pos_kpss)
+        # result_boxes = cv2.dnn.NMSBoxes(bboxes_list, scores_list, 0.25, 0.45, 0.5)
+        # print(result_boxes)
+        scores = np.vstack(scores_list)
+        scores_ravel = scores.ravel()
+        order = scores_ravel.argsort()[::-1]
+        det_scale = det_scale.numpy()  ###
+        bboxes = np.vstack(bboxes_list) / det_scale
+        kpss = np.vstack(kpss_list) / det_scale
+        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+        pre_det = pre_det[order, :]
+        dets = pre_det
+        thresh = 0.4
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = dets[:, 2]
+        y2 = dets[:, 3]
+        scoresb = dets[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        orderb = scoresb.argsort()[::-1]
+        keep = []
+        person_id = 0
+        people = {}
+        while orderb.size > 0:
+            # Add first box in list
+            i = orderb[0]
+            keep.append(i)
+            people[person_id] = orderb[0]
+            # Find overlap of remaining boxes
+            xx1 = np.maximum(x1[i], x1[orderb[1:]])
+            yy1 = np.maximum(y1[i], y1[orderb[1:]])
+            xx2 = np.minimum(x2[i], x2[orderb[1:]])
+            yy2 = np.minimum(y2[i], y2[orderb[1:]])
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
+            inds0 = np.where(ovr > thresh)[0]
+            people[person_id] = np.hstack((people[person_id], orderb[inds0+1])).astype(np.int, copy=False)
+            # identify where there is no overlap (<thresh)
+            inds = np.where(ovr <= thresh)[0]
+            # print(len(inds))
+            orderb = orderb[inds+1]
+            person_id += 1
+        det = pre_det[keep, :]
+        kpss = kpss[order, :, :]
+        # print('order', kpss)
+        # kpss = kpss[keep, :, :]
+        # print('keep',kpss)
+        kpss_ave = []
+        for person in people:
+            # print(kpss[people[person], :, :])
+            # print('mean', np.mean(kpss[people[person], :, :], axis=0))
+            # print(kpss[people[person], :, :].shape)
+            kpss_ave.append(np.mean(kpss[people[person], :, :], axis=0).tolist())
+        if max_num > 0 and det.shape[0] > max_num:
+            area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+            det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
+            offsets = np.vstack([
+                (det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
+                (det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
+            ])
+            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+            values = area - offset_dist_squared * 2.0  # some extra weight on the centering
+            bindex = np.argsort(values)[::-1]  # some extra weight on the centering
+            bindex = bindex[0:max_num]
+            det = det[bindex, :]
+            if kpss is not None:
+                kpss = kpss[bindex, :]
+        # return kpss_ave
+        # delete score column
+        det = np.delete(det, 4, 1)
+        return kpss_ave
+    def detect_scrdf(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
+        if use_landmark_detection:
+            img_landmark = img.clone()
+        # Resize image to fit within the input_size
+        input_size = (640, 640)
+        im_ratio = torch.div(img.size()[1], img.size()[2])
+        model_ratio = float(input_size[1]) / input_size[0]
+        if im_ratio>model_ratio:
+            new_height = input_size[1]
+            new_width = int(new_height / im_ratio)
+        else:
+            new_width = input_size[0]
+            new_height = int(new_width * im_ratio)
+        det_scale = torch.div(new_height,  img.size()[1])
+        resize = v2.Resize((new_height, new_width), antialias=True)
+        img = resize(img)
+        img = img.permute(1,2,0)
+        det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
+        det_img[:new_height,:new_width,  :] = img
+        # Switch to BGR and normalize
+        det_img = det_img[:, :, [2,1,0]]
+        det_img = torch.sub(det_img, 127.5)
+        det_img = torch.div(det_img, 128.0)
+        det_img = det_img.permute(2, 0, 1) #3,128,128
+        # Prepare data and find model parameters
+        det_img = torch.unsqueeze(det_img, 0).contiguous()
+        input_name = self.scrdf_model.get_inputs()[0].name
+        outputs = self.scrdf_model.get_outputs()
+        output_names = []
+        for o in outputs:
+            output_names.append(o.name)
+        io_binding = self.scrdf_model.io_binding()
+        io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32,  shape=det_img.size(), buffer_ptr=det_img.data_ptr())
+        for i in range(len(output_names)):
+            io_binding.bind_output(output_names[i], 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.scrdf_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        input_height = det_img.shape[2]
+        input_width = det_img.shape[3]
+        fmc = 3
+        center_cache = {}
+        scores_list = []
+        bboxes_list = []
+        kpss_list = []
+        for idx, stride in enumerate([8, 16, 32]):
+            scores = net_outs[idx]
+            bbox_preds = net_outs[idx+fmc]
+            bbox_preds = bbox_preds * stride
+            kps_preds = net_outs[idx+fmc*2] * stride
+            height = input_height // stride
+            width = input_width // stride
+            K = height * width
+            key = (height, width, stride)
+            if key in center_cache:
+                anchor_centers = center_cache[key]
+            else:
+                anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+                anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
+                anchor_centers = np.stack([anchor_centers]*2, axis=1).reshape( (-1,2) )
+                if len(center_cache)<100:
+                    center_cache[key] = anchor_centers
+            pos_inds = np.where(scores>=score)[0]
+            x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
+            y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
+            x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
+            y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
+            bboxes = np.stack([x1, y1, x2, y2], axis=-1)
+            pos_scores = scores[pos_inds]
+            pos_bboxes = bboxes[pos_inds]
+            scores_list.append(pos_scores)
+            bboxes_list.append(pos_bboxes)
+            preds = []
+            for i in range(0, kps_preds.shape[1], 2):
+                px = anchor_centers[:, i%2] + kps_preds[:, i]
+                py = anchor_centers[:, i%2+1] + kps_preds[:, i+1]
+                preds.append(px)
+                preds.append(py)
+            kpss = np.stack(preds, axis=-1)
+            #kpss = kps_preds
+            kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
+            pos_kpss = kpss[pos_inds]
+            kpss_list.append(pos_kpss)
+        scores = np.vstack(scores_list)
+        scores_ravel = scores.ravel()
+        order = scores_ravel.argsort()[::-1]
+        det_scale = det_scale.numpy()###
+        bboxes = np.vstack(bboxes_list) / det_scale
+        kpss = np.vstack(kpss_list) / det_scale
+        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+        pre_det = pre_det[order, :]
+        dets = pre_det
+        thresh = 0.4
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = dets[:, 2]
+        y2 = dets[:, 3]
+        scoresb = dets[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        orderb = scoresb.argsort()[::-1]
+        keep = []
+        while orderb.size > 0:
+            i = orderb[0]
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[orderb[1:]])
+            yy1 = np.maximum(y1[i], y1[orderb[1:]])
+            xx2 = np.minimum(x2[i], x2[orderb[1:]])
+            yy2 = np.minimum(y2[i], y2[orderb[1:]])
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
+            inds = np.where(ovr <= thresh)[0]
+            orderb = orderb[inds + 1]
+        det = pre_det[keep, :]
+        kpss = kpss[order,:,:]
+        kpss = kpss[keep,:,:]
+        if max_num > 0 and det.shape[0] > max_num:
+            area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
+                                                    det[:, 1])
+            det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
+            offsets = np.vstack([
+                (det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
+                (det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
+            ])
+            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+            values = area - offset_dist_squared * 2.0  # some extra weight on the centering
+            bindex = np.argsort(values)[::-1]  # some extra weight on the centering
+            bindex = bindex[0:max_num]
+            det = det[bindex, :]
+            if kpss is not None:
+                kpss = kpss[bindex, :]
+        score_values = det[:, 4]
+        # delete score column
+        det = np.delete(det, 4, 1)
+        if use_landmark_detection and len(kpss) > 0:
+            for i in range(kpss.shape[0]):
+                landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, det[i], kpss[i], landmark_detect_mode, landmark_score, from_points)
+                if len(landmark_kpss) > 0:
+                    if len(landmark_scores) > 0:
+                        #print(np.mean(landmark_scores))
+                        #print(np.mean(score_values[i]))
+                        if np.mean(landmark_scores) > np.mean(score_values[i]):
+                            kpss[i] = landmark_kpss
+                    else:
+                        kpss[i] = landmark_kpss
+        return det, kpss
+    def detect_yoloface(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
+        if use_landmark_detection:
+            img_landmark = img.clone()
+        height = img.size(dim=1)
+        width = img.size(dim=2)
+        length = max((height, width))
+        image = torch.zeros((length, length, 3), dtype=torch.uint8, device='cuda')
+        img = img.permute(1,2,0)
+        image[0:height, 0:width] = img
+        scale = length/640.0
+        image = torch.div(image, 255.0)
+        t640 = v2.Resize((640, 640), antialias=False)
+        image = image.permute(2, 0, 1)
+        image = t640(image)
+        image = torch.unsqueeze(image, 0).contiguous()
+        io_binding = self.yoloface_model.io_binding()
+        io_binding.bind_input(name='images', device_type='cuda', device_id=0, element_type=np.float32,  shape=image.size(), buffer_ptr=image.data_ptr())
+        io_binding.bind_output('output0', 'cuda')
+        # Sync and run model
+        self.syncvec.cpu()
+        self.yoloface_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        outputs = np.squeeze(net_outs).T
+        bbox_raw, score_raw, kps_raw = np.split(outputs, [4, 5], axis=1)
+        bbox_list = []
+        score_list = []
+        kps_list = []
+        keep_indices = np.where(score_raw > score)[0]
+        if keep_indices.any():
+            bbox_raw, kps_raw, score_raw = bbox_raw[keep_indices], kps_raw[keep_indices], score_raw[keep_indices]
+            bbox_raw = bbox_raw * scale
+            for bbox in bbox_raw:
+                bbox_list.append(np.array([(bbox[0]-bbox[2]/2), (bbox[1]-bbox[3]/2), (bbox[0]+bbox[2]/2), (bbox[1]+bbox[3]/2)]))
+            kps_raw = kps_raw * scale
+            for kps in kps_raw:
+                indexes = np.arange(0, len(kps), 3)
+                temp_kps = []
+                for index in indexes:
+                    temp_kps.append([kps[index], kps[index + 1]])
+                kps_list.append(np.array(temp_kps))
+            score_list = score_raw.ravel().tolist()
+        result_boxes = cv2.dnn.NMSBoxes(bbox_list, score_list, 0.25, 0.45, 0.5)
+        bboxes_list = []
+        kpss_list = []
+        for r in result_boxes:
+            if r==max_num:
+                break
+            if use_landmark_detection and len(kps_list[r]) > 0:
+                landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, bbox_list[r], kps_list[r], landmark_detect_mode, landmark_score, from_points)
+                if len(landmark_kpss) > 0:
+                    if len(landmark_scores) > 0:
+                        #print(np.mean(landmark_scores))
+                        #print(np.mean(score_list[r]))
+                        if np.mean(landmark_scores) > np.mean(score_list[r]):
+                            kps_list[r] = landmark_kpss
+                    else:
+                        kps_list[r] = landmark_kpss
+            bboxes_list.append(bbox_list[r])
+            kpss_list.append(kps_list[r])
+        return np.array(bboxes_list), np.array(kpss_list)
+    def detect_yoloface2(self, image_in, max_num, score):
+        img = image_in.detach().clone()
+        height = img.size(dim=1)
+        width = img.size(dim=2)
+        length = max((height, width))
+        image = torch.zeros((length, length, 3), dtype=torch.uint8,
+                            device='cuda')
+        img = img.permute(1, 2, 0)
+        image[0:height, 0:width] = img
+        scale = length / 640.0
+        image = torch.div(image, 255.0)
+        t640 = v2.Resize((640, 640), antialias=False)
+        image = image.permute(2, 0, 1)
+        image = t640(image)
+        image = torch.unsqueeze(image, 0).contiguous()
+        io_binding = self.yoloface_model.io_binding()
+        io_binding.bind_input(name='images', device_type='cuda', device_id=0,
+                              element_type=np.float32, shape=image.size(),
+                              buffer_ptr=image.data_ptr())
+        io_binding.bind_output('output0', 'cuda')
+        # Sync and run model
+        self.syncvec.cpu()
+        self.yoloface_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        outputs = np.squeeze(net_outs).T
+        bbox_raw, score_raw, kps_raw = np.split(outputs, [4, 5], axis=1)
+        bbox_list = []
+        score_list = []
+        kps_list = []
+        keep_indices = np.where(score_raw > score)[0]
+        if keep_indices.any():
+            bbox_raw, kps_raw, score_raw = bbox_raw[keep_indices], kps_raw[
+                keep_indices], score_raw[keep_indices]
+            for bbox in bbox_raw:
+                bbox_list.append(np.array(
+                    [(bbox[0] - bbox[2] / 2), (bbox[1] - bbox[3] / 2),
+                     (bbox[0] + bbox[2] / 2), (bbox[1] + bbox[3] / 2)]))
+            kps_raw = kps_raw * scale
+            for kps in kps_raw:
+                indexes = np.arange(0, len(kps), 3)
+                temp_kps = []
+                for index in indexes:
+                    temp_kps.append([kps[index], kps[index + 1]])
+                kps_list.append(np.array(temp_kps))
+            score_list = score_raw.ravel().tolist()
+        result_boxes = cv2.dnn.NMSBoxes(bbox_list, score_list, 0.25, 0.45, 0.5)
+        result = []
+        for r in result_boxes:
+            if r == max_num:
+                break
+            bbox_list = bbox_list[r]
+            result.append(kps_list[r])
+        bbox_list = bbox_list*scale
+        # print(bbox_list)
+        # print(bbox_list*scale)
+        # img = image_in.detach().clone()
+        # test = image_in.permute(1, 2, 0)
+        # test = test.cpu().numpy()
+        # cv2.imwrite('1.jpg', test)
+        # b_scale = 50
+        # bbox_list[0] = bbox_list[0] - b_scale
+        # bbox_list[1] = bbox_list[1] - b_scale
+        # bbox_list[2] = bbox_list[2] + b_scale
+        # bbox_list[3] = bbox_list[3] + b_scale
+        img = image_in.detach().clone()
+        img = img[:, int(bbox_list[1]):int(bbox_list[3]), int(bbox_list[0]):int(bbox_list[2])]
+        # print(img.size())
+        height = img.size(dim=1)
+        width = img.size(dim=2)
+        length = max((height, width))
+        image = torch.zeros((length, length, 3), dtype=torch.uint8, device='cuda')
+        img = img.permute(1,2,0)
+        image[0:height, 0:width] = img
+        scale = length/192
+        image = torch.div(image, 255.0)
+        t192 = v2.Resize((192, 192), antialias=False)
+        image = image.permute(2, 0, 1)
+        image = t192(image)
+        test = image_in.detach().clone().permute(1, 2, 0)
+        test = test.cpu().numpy()
+        input_mean = 0.0
+        input_std = 1.0
+        self.lmk_dim = 2
+        self.lmk_num = 106
+        bbox = bbox_list
+        w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+        center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+        rotate = 0
+        _scale = 192 / (max(w, h) * 1.5)
+        # print('param:', img.shape, bbox, center, self.input_size, _scale, rotate)
+        aimg, M = self.transform(test, center, 192, _scale, rotate)
+        input_size = tuple(aimg.shape[0:2][::-1])
+        # assert input_size==self.input_size
+        blob = cv2.dnn.blobFromImage(aimg, 1.0 / input_std, input_size, ( input_mean, input_mean, input_mean), swapRB=True)
+        pred = self.insight106_model.run(['fc1'], {'data': blob})[0][0]
+        if pred.shape[0] >= 3000:
+            pred = pred.reshape((-1, 3))
+        else:
+            pred = pred.reshape((-1, 2))
+        if self.lmk_num < pred.shape[0]:
+            pred = pred[self.lmk_num * -1:, :]
+        pred[:, 0:2] += 1
+        pred[:, 0:2] *= 96
+        if pred.shape[1] == 3:
+            pred[:, 2] *= (106)
+        IM = cv2.invertAffineTransform(M)
+        pred = self.trans_points2d(pred, IM)
+        # face[self.taskname] = pred
+        # if self.require_pose:
+        #     P = transform.estimate_affine_matrix_3d23d(self.mean_lmk, pred)
+        #     s, R, t = transform.P2sRt(P)
+        #     rx, ry, rz = transform.matrix2angle(R)
+        #     pose = np.array([rx, ry, rz], dtype=np.float32)
+        #     face['pose'] = pose  # pitch, yaw, roll
+        # print(pred.shape)
+        # print(pred)
+        for point in pred:
+            test[int(point[1])] [int(point[0])] [0] = 255
+            test[int(point[1])] [int(point[0])] [1] = 255
+            test[int(point[1])] [int(point[0])] [2] = 255
+        cv2.imwrite('2.jpg', test)
+        predd = []
+        predd.append(pred[38])
+        predd.append(pred[88])
+        # predd.append(pred[86])
+        # predd.append(pred[52])
+        # predd.append(pred[61])
+        predd.append(kps_list[0][2])
+        predd.append(kps_list[0][3])
+        predd.append(kps_list[0][4])
+        # for point in predd:
+        #     test[int(point[1])] [int(point[0])] [0] = 255
+        #     test[int(point[1])] [int(point[0])] [1] = 255
+        #     test[int(point[1])] [int(point[0])] [2] = 255
+        # cv2.imwrite('2.jpg', test)
+        preddd=[]
+        preddd.append(predd)
+        return np.array(preddd)
+    def transform(self, data, center, output_size, scale, rotation):
+        scale_ratio = scale
+        rot = float(rotation) * np.pi / 180.0
+        # translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
+        t1 = trans.SimilarityTransform(scale=scale_ratio)
+        cx = center[0] * scale_ratio
+        cy = center[1] * scale_ratio
+        t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
+        t3 = trans.SimilarityTransform(rotation=rot)
+        t4 = trans.SimilarityTransform(translation=(output_size / 2,
+                                                    output_size / 2))
+        t = t1 + t2 + t3 + t4
+        M = t.params[0:2]
+        cropped = cv2.warpAffine(data,
+                                 M, (output_size, output_size),
+                                 borderValue=0.0)
+        return cropped, M
+    def trans_points2d(self, pts, M):
+        new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
+        for i in range(pts.shape[0]):
+            pt = pts[i]
+            new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
+            new_pt = np.dot(M, new_pt)
+            # print('new_pt', new_pt.shape, new_pt)
+            new_pts[i] = new_pt[0:2]
+        return new_pts
+        # image = torch.unsqueeze(image, 0).contiguous()
+        #
+        # io_binding = self.insight106_model.io_binding()
+        # io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32,  shape=image.size(), buffer_ptr=image.data_ptr())
+        # io_binding.bind_output('fc1', 'cuda')
+        #
+        # # Sync and run model
+        # self.syncvec.cpu()
+        # self.insight106_model.run_with_iobinding(io_binding)
+        #
+        # net_outs = io_binding.copy_outputs_to_cpu()
+        # print(net_outs)
+        # net_outs[0][0] = net_outs[0][0]+1.
+        # net_outs[0][0] = net_outs[0][0]/2.
+        # net_outs[0][0] = net_outs[0][0]*96
+        #
+        # # net_outs[0] = net_outs[0]*scale
+        # # print(net_outs)
+        # test=test*255.0
+        # for i in range(0, len(net_outs[0][0]), 2):
+        #     test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [0] = 255
+        #     test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [1] = 255
+        #     test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [2] = 255
+        # cv2.imwrite('2.jpg', test)
+        #
+        # return np.array(result)
+    def detect_yunet(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
+        if use_landmark_detection:
+            img_landmark = img.clone()
+        height = img.size(dim=1)
+        width = img.size(dim=2)
+        input_size = (640, 640)
+        im_ratio = float(height) / width
+        model_ratio = float(input_size[1]) / input_size[0]
+        if im_ratio > model_ratio:
+            new_height = input_size[1]
+            new_width = int(new_height / im_ratio)
+        else:
+            new_width = input_size[0]
+            new_height = int(new_width * im_ratio)
+        det_scale = float(new_height) / height
+        t640 = v2.Resize((new_height, new_width), antialias=False)
+        img = t640(img)
+        # Switch to BGR
+        img = img.permute(1,2,0)
+        img = img[:, :, [2,1,0]]
+        image = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.uint8, device='cuda')
+        image[:new_height, :new_width, :] = img
+        image = image.permute(2, 0, 1)
+        image = torch.unsqueeze(image, 0).contiguous()
+        image = image.to(dtype=torch.float32)
+        input_name = self.yunet_model.get_inputs()[0].name
+        outputs = self.yunet_model.get_outputs()
+        output_names = []
+        for o in outputs:
+            output_names.append(o.name)
+        io_binding = self.yunet_model.io_binding()
+        io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32,  shape=image.size(), buffer_ptr=image.data_ptr())
+        for i in range(len(output_names)):
+            io_binding.bind_output(output_names[i], 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.yunet_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        strides = [8, 16, 32]
+        scores, bboxes, kpss = [], [], []
+        for idx, stride in enumerate(strides):
+            cls_pred = net_outs[idx].reshape(-1, 1)
+            obj_pred = net_outs[idx + len(strides)].reshape(-1, 1)
+            reg_pred = net_outs[idx + len(strides) * 2].reshape(-1, 4)
+            kps_pred = net_outs[idx + len(strides) * 3].reshape(
+                -1, 5 * 2)
+            anchor_centers = np.stack(
+                np.mgrid[:(input_size[1] // stride), :(input_size[0] //
+                                                       stride)][::-1],
+                axis=-1)
+            anchor_centers = (anchor_centers * stride).astype(
+                np.float32).reshape(-1, 2)
+            bbox_cxy = reg_pred[:, :2] * stride + anchor_centers[:]
+            bbox_wh = np.exp(reg_pred[:, 2:]) * stride
+            tl_x = (bbox_cxy[:, 0] - bbox_wh[:, 0] / 2.)
+            tl_y = (bbox_cxy[:, 1] - bbox_wh[:, 1] / 2.)
+            br_x = (bbox_cxy[:, 0] + bbox_wh[:, 0] / 2.)
+            br_y = (bbox_cxy[:, 1] + bbox_wh[:, 1] / 2.)
+            bboxes.append(np.stack([tl_x, tl_y, br_x, br_y], -1))
+            # for nk in range(5):
+            per_kps = np.concatenate(
+                [((kps_pred[:, [2 * i, 2 * i + 1]] * stride) + anchor_centers)
+                 for i in range(5)],
+                axis=-1)
+            kpss.append(per_kps)
+            scores.append(cls_pred * obj_pred)
+        scores = np.concatenate(scores, axis=0).reshape(-1)
+        bboxes = np.concatenate(bboxes, axis=0)
+        kpss = np.concatenate(kpss, axis=0)
+        score_mask = (scores > score)
+        scores = scores[score_mask]
+        bboxes = bboxes[score_mask]
+        kpss = kpss[score_mask]
+        bboxes /= det_scale
+        kpss /= det_scale
+        pre_det = np.hstack((bboxes, scores[:, None]))
+        dets = pre_det
+        thresh = 0.4
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = dets[:, 2]
+        y2 = dets[:, 3]
+        scoresb = dets[:, -1]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scoresb.argsort()[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[order[1:]] - inter)
+            inds = np.where(ovr <= thresh)[0]
+            order = order[inds + 1]
+        kpss = kpss[keep, :]
+        bboxes = pre_det[keep, :]
+        score_values = bboxes[:, 4]
+        bbox_list = []
+        kps_list = []
+        for i in range(bboxes.shape[0]):
+            if i==max_num:
+                    break
+            box = np.array((bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]))
+            bbox_list.append(box)
+            if kpss is not None:
+                kps = kpss[i].reshape(-1, 2)
+                if use_landmark_detection and len(kps) > 0:
+                    landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, box, kps, landmark_detect_mode, landmark_score, from_points)
+                    if len(landmark_kpss) > 0:
+                        if len(landmark_scores) > 0:
+                            #print(np.mean(landmark_scores))
+                            #print(np.mean(score_values[i]))
+                            if np.mean(landmark_scores) > np.mean(score_values[i]):
+                                kps = landmark_kpss
+                        else:
+                            kps = landmark_kpss
+                kps_list.append(kps)
+        return np.array(bbox_list), np.array(kps_list)
+    def detect_face_landmark_5(self, img, bbox, det_kpss, from_points=False):
+        if from_points == False:
+            w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+            center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+            rotate = 0
+            _scale = 512.0  / (max(w, h)*1.5)
+            image, M = faceutil.transform(img, center, 512, _scale, rotate)
+        else:
+            image, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 512, normalized=True)
+        image = image.permute(1,2,0)
+        mean = torch.tensor([104, 117, 123], dtype=torch.float32, device='cuda')
+        image = torch.sub(image, mean)
+        image = image.permute(2,0,1)
+        image = torch.reshape(image, (1, 3, 512, 512))
+        height, width = (512, 512)
+        tmp = [width, height, width, height, width, height, width, height, width, height]
+        scale1 = torch.tensor(tmp, dtype=torch.float32, device='cuda')
+        conf = torch.empty((1,10752,2), dtype=torch.float32, device='cuda').contiguous()
+        landmarks = torch.empty((1,10752,10), dtype=torch.float32, device='cuda').contiguous()
+        io_binding = self.resnet50_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='conf', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,2), buffer_ptr=conf.data_ptr())
+        io_binding.bind_output(name='landmarks', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,10), buffer_ptr=landmarks.data_ptr())
+        torch.cuda.synchronize('cuda')
+        self.resnet50_model.run_with_iobinding(io_binding)
+        scores = torch.squeeze(conf)[:, 1]
+        priors = torch.tensor(self.anchors).view(-1, 4)
+        priors = priors.to('cuda')
+        pre = torch.squeeze(landmarks, 0)
+        tmp = (priors[:, :2] + pre[:, :2] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 2:4] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 4:6] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 6:8] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 8:10] * 0.1 * priors[:, 2:])
+        landmarks = torch.cat(tmp, dim=1)
+        landmarks = torch.mul(landmarks, scale1)
+        landmarks = landmarks.cpu().numpy()
+        # ignore low scores
+        score=.1
+        inds = torch.where(scores>score)[0]
+        inds = inds.cpu().numpy()
+        scores = scores.cpu().numpy()
+        landmarks, scores = landmarks[inds], scores[inds]
+        # sort
+        order = scores.argsort()[::-1]
+        if len(order) > 0:
+            landmarks = landmarks[order][0]
+            scores = scores[order][0]
+            landmarks = np.array([[landmarks[i], landmarks[i + 1]] for i in range(0,10,2)])
+            IM = faceutil.invertAffineTransform(M)
+            landmarks = faceutil.trans_points2d(landmarks, IM)
+            scores = np.array([scores])
+            #faceutil.test_bbox_landmarks(img, bbox, landmarks)
+            #print(scores)
+            return landmarks, scores
+        return [], []
+    def detect_face_landmark_68(self, img, bbox, det_kpss, convert68_5=True, from_points=False):
+        if from_points == False:
+            crop_image, affine_matrix = faceutil.warp_face_by_bounding_box_for_landmark_68(img, bbox, (256, 256))
+        else:
+            crop_image, affine_matrix = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 256, normalized=True)
+        '''
+        cv2.imshow('image', crop_image.permute(1, 2, 0).to('cpu').numpy())
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        '''
+        crop_image = crop_image.to(dtype=torch.float32)
+        crop_image = torch.div(crop_image, 255.0)
+        crop_image = torch.unsqueeze(crop_image, 0).contiguous()
+        io_binding = self.face_landmark_68_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32,  shape=crop_image.size(), buffer_ptr=crop_image.data_ptr())
+        io_binding.bind_output('landmarks_xyscore', 'cuda')
+        io_binding.bind_output('heatmaps', 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.face_landmark_68_model.run_with_iobinding(io_binding)
+        net_outs = io_binding.copy_outputs_to_cpu()
+        face_landmark_68 = net_outs[0]
+        face_heatmap = net_outs[1]
+        face_landmark_68 = face_landmark_68[:, :, :2][0] / 64.0
+        face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256.0
+        face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix))
+        face_landmark_68 = face_landmark_68.reshape(-1, 2)
+        face_landmark_68_score = np.amax(face_heatmap, axis = (2, 3))
+        face_landmark_68_score = face_landmark_68_score.reshape(-1, 1)
+        if convert68_5:
+            face_landmark_68, face_landmark_68_score = faceutil.convert_face_landmark_68_to_5(face_landmark_68, face_landmark_68_score)
+        #faceutil.test_bbox_landmarks(img, bbox, face_landmark_68)
+        return face_landmark_68, face_landmark_68_score
+    def detect_face_landmark_3d68(self, img, bbox, det_kpss, convert68_5=True, from_points=False):
+        if from_points == False:
+            w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+            center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+            rotate = 0
+            _scale = 192  / (max(w, h)*1.5)
+            #print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
+            aimg, M = faceutil.transform(img, center, 192, _scale, rotate)
+        else:
+            aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=192, normalized=True)
+        '''
+        cv2.imshow('image', aimg.permute(1.2.0).to('cpu').numpy())
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        '''
+        aimg = torch.unsqueeze(aimg, 0).contiguous()
+        aimg = aimg.to(dtype=torch.float32)
+        aimg = self.normalize(aimg)
+        io_binding = self.face_landmark_3d68_model.io_binding()
+        io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32,  shape=aimg.size(), buffer_ptr=aimg.data_ptr())
+        io_binding.bind_output('fc1', 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.face_landmark_3d68_model.run_with_iobinding(io_binding)
+        pred = io_binding.copy_outputs_to_cpu()[0][0]
+        if pred.shape[0] >= 3000:
+            pred = pred.reshape((-1, 3))
+        else:
+            pred = pred.reshape((-1, 2))
+        if 68 < pred.shape[0]:
+            pred = pred[68*-1:,:]
+        pred[:, 0:2] += 1
+        pred[:, 0:2] *= (192 // 2)
+        if pred.shape[1] == 3:
+            pred[:, 2] *= (192 // 2)
+        #IM = cv2.invertAffineTransform(M)
+        IM = faceutil.invertAffineTransform(M)
+        pred = faceutil.trans_points3d(pred, IM)
+        # at moment we don't use 3d points
+        '''
+        P = faceutil.estimate_affine_matrix_3d23d(self.mean_lmk, pred)
+        s, R, t = faceutil.P2sRt(P)
+        rx, ry, rz = faceutil.matrix2angle(R)
+        pose = np.array( [rx, ry, rz], dtype=np.float32 ) #pitch, yaw, roll
+        '''
+        # convert from 3d68 to 2d68 keypoints
+        landmark2d68 = np.array(pred[:, [0, 1]])
+        if convert68_5:
+            # convert from 68 to 5 keypoints
+            landmark2d68, _ = faceutil.convert_face_landmark_68_to_5(landmark2d68, [])
+        #faceutil.test_bbox_landmarks(img, bbox, landmark2d68)
+        return landmark2d68, []
+    def detect_face_landmark_98(self, img, bbox, det_kpss, convert98_5=True, from_points=False):
+        if from_points == False:
+            crop_image, detail = faceutil.warp_face_by_bounding_box_for_landmark_98(img, bbox, (256, 256))
+        else:
+            crop_image, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=256, normalized=True)
+            #crop_image2 = crop_image.clone()
+            h, w = (crop_image.size(dim=1), crop_image.size(dim=2))
+        '''
+        cv2.imshow('image', crop_image.permute(1, 2, 0).to('cpu').numpy())
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        '''
+        landmark = []
+        landmark_score = []
+        if crop_image is not None:
+            crop_image = crop_image.to(dtype=torch.float32)
+            crop_image = torch.div(crop_image, 255.0)
+            crop_image = torch.unsqueeze(crop_image, 0).contiguous()
+            io_binding = self.face_landmark_98_model.io_binding()
+            io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32,  shape=crop_image.size(), buffer_ptr=crop_image.data_ptr())
+            io_binding.bind_output('landmarks_xyscore', 'cuda')
+            # Sync and run model
+            syncvec = self.syncvec.cpu()
+            self.face_landmark_98_model.run_with_iobinding(io_binding)
+            landmarks_xyscore = io_binding.copy_outputs_to_cpu()[0]
+            if len(landmarks_xyscore) > 0:
+                for one_face_landmarks in landmarks_xyscore:
+                    landmark_score = one_face_landmarks[:, [2]].reshape(-1)
+                    landmark = one_face_landmarks[:, [0, 1]].reshape(-1,2)
+                    ##recorver, and grouped as [98,2]
+                    if from_points == False:
+                        landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] - detail[4]
+                        landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] - detail[4]
+                    else:
+                        landmark[:, 0] = landmark[:, 0] * w
+                        landmark[:, 1] = landmark[:, 1] * h
+                        #lmk = landmark.copy()
+                        #lmk_score = landmark_score.copy()
+                        #IM = cv2.invertAffineTransform(M)
+                        IM = faceutil.invertAffineTransform(M)
+                        landmark = faceutil.trans_points2d(landmark, IM)
+                    if convert98_5:
+                        landmark, landmark_score = faceutil.convert_face_landmark_98_to_5(landmark, landmark_score)
+                        #lmk, lmk_score = faceutil.convert_face_landmark_98_to_5(lmk, lmk_score)
+                    #faceutil.test_bbox_landmarks(crop_image2, [], lmk)
+                    #faceutil.test_bbox_landmarks(img, bbox, landmark)
+                    #faceutil.test_bbox_landmarks(img, bbox, det_kpss)
+        return landmark, landmark_score
+    def detect_face_landmark_106(self, img, bbox, det_kpss, convert106_5=True, from_points=False):
+        if from_points == False:
+            w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+            center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+            rotate = 0
+            _scale = 192  / (max(w, h)*1.5)
+            #print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
+            aimg, M = faceutil.transform(img, center, 192, _scale, rotate)
+        else:
+            aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=192, normalized=True)
+        '''
+        cv2.imshow('image', aimg.permute(1.2.0).to('cpu').numpy())
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        '''
+        aimg = torch.unsqueeze(aimg, 0).contiguous()
+        aimg = aimg.to(dtype=torch.float32)
+        aimg = self.normalize(aimg)
+        io_binding = self.face_landmark_106_model.io_binding()
+        io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32,  shape=aimg.size(), buffer_ptr=aimg.data_ptr())
+        io_binding.bind_output('fc1', 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.face_landmark_106_model.run_with_iobinding(io_binding)
+        pred = io_binding.copy_outputs_to_cpu()[0][0]
+        if pred.shape[0] >= 3000:
+            pred = pred.reshape((-1, 3))
+        else:
+            pred = pred.reshape((-1, 2))
+        if 106 < pred.shape[0]:
+            pred = pred[106*-1:,:]
+        pred[:, 0:2] += 1
+        pred[:, 0:2] *= (192 // 2)
+        if pred.shape[1] == 3:
+            pred[:, 2] *= (192 // 2)
+        #IM = cv2.invertAffineTransform(M)
+        IM = faceutil.invertAffineTransform(M)
+        pred = faceutil.trans_points(pred, IM)
+        if pred is not None:
+            if convert106_5:
+                # convert from 106 to 5 keypoints
+                pred = faceutil.convert_face_landmark_106_to_5(pred)
+        #faceutil.test_bbox_landmarks(img, bbox, pred)
+        return pred, []
+    def detect_face_landmark_478(self, img, bbox, det_kpss, convert478_5=True, from_points=False):
+        if from_points == False:
+            w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
+            center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
+            rotate = 0
+            _scale = 256.0  / (max(w, h)*1.5)
+            #print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
+            aimg, M = faceutil.transform(img, center, 256, _scale, rotate)
+        else:
+            aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 256, normalized=False)
+            #aimg2 = aimg.clone()
+        '''
+        cv2.imshow('image', aimg.permute(1,2,0).to('cpu').numpy())
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+        '''
+        aimg = torch.unsqueeze(aimg, 0).contiguous()
+        aimg = aimg.to(dtype=torch.float32)
+        aimg = torch.div(aimg, 255.0)
+        io_binding = self.face_landmark_478_model.io_binding()
+        io_binding.bind_input(name='input_12', device_type='cuda', device_id=0, element_type=np.float32,  shape=aimg.size(), buffer_ptr=aimg.data_ptr())
+        io_binding.bind_output('Identity', 'cuda')
+        io_binding.bind_output('Identity_1', 'cuda')
+        io_binding.bind_output('Identity_2', 'cuda')
+        # Sync and run model
+        syncvec = self.syncvec.cpu()
+        self.face_landmark_478_model.run_with_iobinding(io_binding)
+        landmarks, faceflag, blendshapes = io_binding.copy_outputs_to_cpu()
+        landmarks = landmarks.reshape( (1,478,3))
+        landmark = []
+        landmark_score = []
+        if len(landmarks) > 0:
+            for one_face_landmarks in landmarks:
+                #lmk = one_face_landmarks.copy()
+                landmark = one_face_landmarks
+                #IM = cv2.invertAffineTransform(M)
+                IM = faceutil.invertAffineTransform(M)
+                landmark = faceutil.trans_points3d(landmark, IM)
+                '''
+                P = faceutil.estimate_affine_matrix_3d23d(self.mean_lmk, landmark)
+                s, R, t = faceutil.P2sRt(P)
+                rx, ry, rz = faceutil.matrix2angle(R)
+                pose = np.array( [rx, ry, rz], dtype=np.float32 ) #pitch, yaw, roll
+                '''
+                landmark = landmark[:, [0, 1]].reshape(-1,2)
+                #lmk = lmk[:, [0, 1]].reshape(-1,2)
+                #get scores
+                landmark_for_score = landmark[self.LandmarksSubsetIdxs]
+                landmark_for_score = landmark_for_score[:, :2]
+                landmark_for_score = np.expand_dims(landmark_for_score, axis=0)
+                landmark_for_score = landmark_for_score.astype(np.float32)
+                landmark_for_score = torch.from_numpy(landmark_for_score).to('cuda')
+                io_binding_bs = self.face_blendshapes_model.io_binding()
+                io_binding_bs.bind_input(name='input_points', device_type='cuda', device_id=0, element_type=np.float32,  shape=tuple(landmark_for_score.shape), buffer_ptr=landmark_for_score.data_ptr())
+                io_binding_bs.bind_output('output', 'cuda')
+                # Sync and run model
+                syncvec = self.syncvec.cpu()
+                self.face_blendshapes_model.run_with_iobinding(io_binding_bs)
+                landmark_score = io_binding_bs.copy_outputs_to_cpu()[0]
+                if convert478_5:
+                    # convert from 478 to 5 keypoints
+                    landmark = faceutil.convert_face_landmark_478_to_5(landmark)
+                    #lmk = faceutil.convert_face_landmark_478_to_5(lmk)
+                #faceutil.test_bbox_landmarks(aimg2, [], lmk)
+                #faceutil.test_bbox_landmarks(img, bbox, landmark)
+                #faceutil.test_bbox_landmarks(img, bbox, det_kpss)
+        #return landmark, landmark_score
+        return landmark, []
+    def recognize(self, img, face_kps):
+        '''
+        # Find transform
+        dst = self.arcface_dst.copy()
+        dst[:, 0] += 8.0
+        tform = trans.SimilarityTransform()
+        tform.estimate(face_kps, dst)
+        # Transform
+        img = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
+        img = v2.functional.crop(img, 0,0, 128, 128)
+        img = v2.Resize((112, 112), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)(img)
+        '''
+        # Find transform
+        tform = trans.SimilarityTransform()
+        tform.estimate(face_kps, self.arcface_dst)
+        # Transform
+        img = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
+        img = v2.functional.crop(img, 0,0, 112, 112)
+        cropped_image = img
+        # Switch to BGR and normalize
+        img = img.permute(1,2,0) #112,112,3
+        img = img[:, :, [2,1,0]]
+        img = torch.sub(img, 127.5)
+        img = torch.div(img, 127.5)
+        img = img.permute(2, 0, 1) #3,112,112
+        # Prepare data and find model parameters
+        img = torch.unsqueeze(img, 0).contiguous()
+        input_name = self.recognition_model.get_inputs()[0].name
+        outputs = self.recognition_model.get_outputs()
+        output_names = []
+        for o in outputs:
+            output_names.append(o.name)
+        io_binding = self.recognition_model.io_binding()
+        io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32,  shape=img.size(), buffer_ptr=img.data_ptr())
+        for i in range(len(output_names)):
+            io_binding.bind_output(output_names[i], 'cuda')
+        # Sync and run model
+        self.syncvec.cpu()
+        self.recognition_model.run_with_iobinding(io_binding)
+        # Return embedding
+        return np.array(io_binding.copy_outputs_to_cpu()).flatten(), cropped_image
+    def resnet50(self, image, score=.5):
+        if not self.resnet50_model:
+            self.resnet50_model = onnxruntime.InferenceSession("./models/res50.onnx", providers=self.providers)
+            feature_maps = [[64, 64], [32, 32], [16, 16]]
+            min_sizes = [[16, 32], [64, 128], [256, 512]]
+            steps = [8, 16, 32]
+            image_size = 512
+            for k, f in enumerate(feature_maps):
+                min_size_array = min_sizes[k]
+                for i, j in product(range(f[0]), range(f[1])):
+                    for min_size in min_size_array:
+                        s_kx = min_size / image_size
+                        s_ky = min_size / image_size
+                        dense_cx = [x * steps[k] / image_size for x in [j + 0.5]]
+                        dense_cy = [y * steps[k] / image_size for y in [i + 0.5]]
+                        for cy, cx in product(dense_cy, dense_cx):
+                            self.anchors += [cx, cy, s_kx, s_ky]
+        # image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
+        image = image.permute(1,2,0)
+        # image = image - [104, 117, 123]
+        mean = torch.tensor([104, 117, 123], dtype=torch.float32, device='cuda')
+        image = torch.sub(image, mean)
+        # image = image.transpose(2, 0, 1)
+        # image = np.float32(image[np.newaxis,:,:,:])
+        image = image.permute(2,0,1)
+        image = torch.reshape(image, (1, 3, 512, 512))
+        height, width = (512, 512)
+        tmp = [width, height, width, height, width, height, width, height, width, height]
+        scale1 = torch.tensor(tmp, dtype=torch.float32, device='cuda')
+        # ort_inputs = {"input": image}
+        conf = torch.empty((1,10752,2), dtype=torch.float32, device='cuda').contiguous()
+        landmarks = torch.empty((1,10752,10), dtype=torch.float32, device='cuda').contiguous()
+        io_binding = self.resnet50_model.io_binding()
+        io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
+        io_binding.bind_output(name='conf', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,2), buffer_ptr=conf.data_ptr())
+        io_binding.bind_output(name='landmarks', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,10), buffer_ptr=landmarks.data_ptr())
+        # _, conf, landmarks = self.resnet_model.run(None, ort_inputs)
+        torch.cuda.synchronize('cuda')
+        self.resnet50_model.run_with_iobinding(io_binding)
+        # conf = torch.from_numpy(conf)
+        # scores = conf.squeeze(0).numpy()[:, 1]
+        scores = torch.squeeze(conf)[:, 1]
+        # landmarks = torch.from_numpy(landmarks)
+        # landmarks = landmarks.to('cuda')
+        priors = torch.tensor(self.anchors).view(-1, 4)
+        priors = priors.to('cuda')
+        # pre = landmarks.squeeze(0)
+        pre = torch.squeeze(landmarks, 0)
+        tmp = (priors[:, :2] + pre[:, :2] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 2:4] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 4:6] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 6:8] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 8:10] * 0.1 * priors[:, 2:])
+        landmarks = torch.cat(tmp, dim=1)
+        # landmarks = landmarks * scale1
+        landmarks = torch.mul(landmarks, scale1)
+        landmarks = landmarks.cpu().numpy()
+        # ignore low scores
+        inds = torch.where(scores>score)[0]
+        inds = inds.cpu().numpy()
+        scores = scores.cpu().numpy()
+        landmarks, scores = landmarks[inds], scores[inds]
+        # sort
+        order = scores.argsort()[::-1]
+        landmarks = landmarks[order][0]
+        return np.array([[landmarks[i], landmarks[i + 1]] for i in range(0,10,2)])

rope/Styles.py ADDED Viewed

	@@ -0,0 +1,293 @@

+bg = 'black'
+main = '#1A1A1A' #Not as Dark Grey '#1A1A1A'
+main2 = '#151515' #Dark Grey '#151515'
+main3 = '#28282E' #Light Grey '#28282E'
+canvas_frame_label_1 = {
+    'bg':                       main2,
+    'bd':                       '0',
+    'relief':                   'flat',
+    'highlightthickness':       '0'
+    }
+canvas_frame_label_2 =      {
+    'bg':                       main2,
+    'bd':                       '0',
+    'relief':                   'flat',
+    'highlightthickness':       '0'
+    }
+canvas_frame_label_3 = {
+    'bg':                       main,
+    'bd':                       '0',
+    'relief':                   'flat',
+    'highlightthickness':       '0'
+    }
+info_label = {
+    'bg':                       main2,
+    'fg':                       '#BCBCBC',
+    'bd':                        '5',
+    'relief':                   'flat',
+    'highlightthickness':       '0',
+    'font':                     ("Segoe UI", 9),
+    'anchor':                   'nw',
+    'justify':                  'left',
+    }
+text_1 = {
+    'bg':                       main2,
+    'fg':                       'white',
+    'activebackground':         main2,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 9)
+    }
+text_2 = {
+    'bg':                       main2,
+    'fg':                       '#D0D0D0',
+    'activebackground':         main2,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 9)
+    }
+text_3 = {
+    'bg':                       main,
+    'fg':                       '#979797',
+    'activebackground':         main,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 9)
+    }
+option_slider_style = {
+    'bg':                       main,
+    'activebackground':         main,
+    'highlightcolor':           'white',
+    'highlightthickness':       '0',
+    'relief':                   'flat',
+    'sliderrelief':             'flat',
+    'border':                   '0',
+    'width':                    '3',
+    'troughcolor':              '#1F1F1F',
+    }
+entry_3 = {
+    'bg':                       '#1F1F1F',
+    'fg':                       '#FFFFFF',
+    'relief':                   'flat',
+    'border':                   '0',
+    'width':                    '5',
+    'justify':                  'c',
+    'font':                     ("Segoe UI", 9),
+    'highlightthickness':        '1',
+    'highlightbackground':        '#17181A',
+    }
+entry_2 = {
+    'bg':                       '#1F1F1F',
+    'fg':                       '#FFFFFF',
+    'relief':                   'flat',
+    'border':                   '0',
+    'highlightthickness':        '1',
+    'highlightbackground':        '#17181A',
+    'width':                    '5',
+    'justify':                  'l',
+    'font':                     ("Segoe UI", 9)
+    }
+text_selection_off_3 = {
+    'bg':                       main,
+    'fg':                       '#7A7A7A',
+    'activebackground':         main,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 10)
+    }
+text_selection_on_3 = {
+    'bg':                       main,
+    'fg':                       '#FFFFFF',
+    'activebackground':         main,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 10)
+    }
+text_selection_off_2 = {
+    'bg':                       main2,
+    'fg':                       '#7A7A7A',
+    'activebackground':         main2,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 10)
+    }
+text_selection_on_2 = {
+    'bg':                       main2,
+    'fg':                       '#FFFFFF',
+    'activebackground':         main2,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 10)
+    }
+parameter_switch_3 = {
+    'bg':                       main,
+    'fg':                       '#FFFFFF',
+    'activebackground':         main,
+    'activeforeground':         'white',
+    'relief':                   'flat',
+    'border':                   '0',
+    'font':                     ("Segoe UI", 10)
+    }
+canvas_bg = {
+    'bg':                       bg,
+    'relief':                   'flat',
+    'bd':                       '0',
+    'highlightthickness':       '0'
+    }
+icon = {
+    'IconOn':                   './rope/media/OnState.png',
+    'IconOff':                  './rope/media/OffState.png',
+    }
+frame_style_bg =      {
+    'bg':               bg,
+    'relief':           'flat',
+    'bd':               '0'
+    }
+button_3 =    {
+    'bg':               main2,
+    'fg':               '#FFFFFF',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 10)
+    }
+button_2 =    {
+    'bg':               main2,
+    'fg':               '#FFFFFF',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 10)
+    }
+button_1 =    {
+    'bg':               main2,
+    'fg':               '#FFFFFF',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 10)
+    }
+button_inactive =    {
+    'bg':               main2,
+    'fg':               '#FFFFFF',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 10)
+    }
+button_active =    {
+    'bg':               main2,
+    'fg':               '#FFFFFF',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 10)
+    }
+media_button_off_3= {
+    'bg':               main2,
+    'fg':               '#7A7A7A',
+    'activebackground': main2,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 8)
+    }
+media_button_on_3= {
+    'bg':               '#4a57ee',
+    'fg':               '#FFFFFF',
+    'activebackground': '#4a57ee',
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 8)
+    }
+ui_text_na_2 =    {
+    'bg':               main,
+    'fg':               '#7A7A7A',
+    'activebackground': main,
+    'activeforeground': 'white',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI", 9)
+    }
+timeline_canvas = {
+    'bg':                       main,
+    'relief':                   'flat',
+    'bd':                       '0',
+    'highlightthickness':       '0'
+    }
+donate_1 = {
+    'bg':               main,
+    'fg':               '#7562ee',
+    'relief':           'flat',
+    'border':           '0',
+    'font':             ("Segoe UI Semibold", 10),
+    'cursor':           "hand2",
+    }
+# Panes
+# 3:#28282E
+# 2:#212126
+# 1:#17181A
+# preview background: #1A1A1A
+# Num Fields, slider bg: #1F1F1F
+# slider ball: #919191
+# Borders:#090909
+# Text
+# On/off:#FFFFFF
+# labels: #D0D0D0
+# notActive: #7A7A7A
+# active:#FFFFFF
+# highlighted button: #B1B1B2
+# Button off: #828282
+# on:     #FFFFFF
+# hover:  #b1b1b2
+# off:    #828282

rope/VideoManager.py ADDED Viewed

	@@ -0,0 +1,1242 @@

+import os
+import cv2
+import tkinter as tk
+from PIL import Image, ImageTk
+import threading
+import time
+import numpy as np
+from skimage import transform as trans
+import subprocess
+from math import floor, ceil
+import bisect
+import onnxruntime
+import torchvision
+from torchvision.transforms.functional import normalize #update to v2
+import torch
+from torchvision import transforms
+torchvision.disable_beta_transforms_warning()
+from torchvision.transforms import v2
+torch.set_grad_enabled(False)
+onnxruntime.set_default_logger_severity(4)
+import inspect #print(inspect.currentframe().f_back.f_code.co_name, 'resize_image')
+device = 'cuda'
+lock=threading.Lock()
+class VideoManager():
+    def __init__(self, models ):
+        self.models = models
+        # Model related
+        self.swapper_model = []             # insightface swapper model
+        # self.faceapp_model = []             # insight faceapp model
+        self.input_names = []               # names of the inswapper.onnx inputs
+        self.input_size = []                # size of the inswapper.onnx inputs
+        self.output_names = []              # names of the inswapper.onnx outputs
+        self.arcface_dst = np.array( [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]], dtype=np.float32)
+        self.video_file = []
+        self.FFHQ_kps = np.array([[ 192.98138, 239.94708 ], [ 318.90277, 240.1936 ], [ 256.63416, 314.01935 ], [ 201.26117, 371.41043 ], [ 313.08905, 371.15118 ] ])
+        #Video related
+        self.capture = []                   # cv2 video
+        self.is_video_loaded = False        # flag for video loaded state
+        self.video_frame_total = None       # length of currently loaded video
+        self.play = False                   # flag for the play button toggle
+        self.current_frame = 0              # the current frame of the video
+        self.create_video = False
+        self.output_video = []
+        self.file_name = []
+        # Play related
+        # self.set_read_threads = []          # Name of threaded function
+        self.frame_timer = 0.0      # used to set the framerate during playing
+        # Queues
+        self.action_q = []                  # queue for sending to the coordinator
+        self.frame_q = []                   # queue for frames that are ready for coordinator
+        self.r_frame_q = []                 # queue for frames that are requested by the GUI
+        self.read_video_frame_q = []
+        # swapping related
+        # self.source_embedding = []          # array with indexed source embeddings
+        self.found_faces = []   # array that maps the found faces to source faces
+        self.parameters = []
+        self.target_video = []
+        self.fps = 1.0
+        self.temp_file = []
+        self.clip_session = []
+        self.start_time = []
+        self.record = False
+        self.output = []
+        self.image = []
+        self.saved_video_path = []
+        self.sp = []
+        self.timer = []
+        self.fps_average = []
+        self.total_thread_time = 0.0
+        self.start_play_time = []
+        self.start_play_frame = []
+        self.rec_thread = []
+        self.markers = []
+        self.is_image_loaded = False
+        self.stop_marker = -1
+        self.perf_test = False
+        self.control = []
+        self.process_q =    {
+                            "Thread":                   [],
+                            "FrameNumber":              [],
+                            "ProcessedFrame":           [],
+                            "Status":                   'clear',
+                            "ThreadTime":               []
+                            }
+        self.process_qs = []
+        self.rec_q =    {
+                            "Thread":                   [],
+                            "FrameNumber":              [],
+                            "Status":                   'clear'
+                            }
+        self.rec_qs = []
+    def assign_found_faces(self, found_faces):
+        self.found_faces = found_faces
+    def load_target_video( self, file ):
+        # If we already have a video loaded, release it
+        if self.capture:
+            self.capture.release()
+        # Open file
+        self.video_file = file
+        self.capture = cv2.VideoCapture(file)
+        self.fps = self.capture.get(cv2.CAP_PROP_FPS)
+        if not self.capture.isOpened():
+            print("Cannot open file: ", file)
+        else:
+            self.target_video = file
+            self.is_video_loaded = True
+            self.is_image_loaded = False
+            self.video_frame_total = int(self.capture.get(cv2.CAP_PROP_FRAME_COUNT))
+            self.play = False
+            self.current_frame = 0
+            self.frame_timer = time.time()
+            self.frame_q = []
+            self.r_frame_q = []
+            self.found_faces = []
+            self.add_action("set_slider_length",self.video_frame_total-1)
+        self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+        success, image = self.capture.read()
+        if success:
+            crop = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # RGB
+            temp = [crop, False]
+            self.r_frame_q.append(temp)
+            self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+    def load_target_image(self, file):
+        if self.capture:
+            self.capture.release()
+        self.is_video_loaded = False
+        self.play = False
+        self.frame_q = []
+        self.r_frame_q = []
+        self.found_faces = []
+        self.image = cv2.imread(file) # BGR
+        self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) # RGB
+        temp = [self.image, False]
+        self.frame_q.append(temp)
+        self.is_image_loaded = True
+    ## Action queue
+    def add_action(self, action, param):
+        # print(inspect.currentframe().f_back.f_code.co_name, '->add_action: '+action)
+        temp = [action, param]
+        self.action_q.append(temp)
+    def get_action_length(self):
+        return len(self.action_q)
+    def get_action(self):
+        action = self.action_q[0]
+        self.action_q.pop(0)
+        return action
+    ## Queues for the Coordinator
+    def get_frame(self):
+        frame = self.frame_q[0]
+        self.frame_q.pop(0)
+        return frame
+    def get_frame_length(self):
+        return len(self.frame_q)
+    def get_requested_frame(self):
+        frame = self.r_frame_q[0]
+        self.r_frame_q.pop(0)
+        return frame
+    def get_requested_frame_length(self):
+        return len(self.r_frame_q)
+    def get_requested_video_frame(self, frame, marker=True):
+        temp = []
+        if self.is_video_loaded:
+            if self.play == True:
+                self.play_video("stop")
+                self.process_qs = []
+            self.current_frame = int(frame)
+            self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+            success, target_image = self.capture.read() #BGR
+            if success:
+                target_image = cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB) #RGB
+                if not self.control['SwapFacesButton']:
+                    temp = [target_image, self.current_frame] #temp = RGB
+                else:
+                    temp = [self.swap_video(target_image, self.current_frame, marker), self.current_frame] # temp = RGB
+                self.r_frame_q.append(temp)
+        elif self.is_image_loaded:
+            if not self.control['SwapFacesButton']:
+                temp = [self.image, self.current_frame] # image = RGB
+            else:
+                temp = [self.swap_video(self.image, self.current_frame, False), self.current_frame] # image = RGB
+            self.r_frame_q.append(temp)
+    def find_lowest_frame(self, queues):
+        min_frame=999999999
+        index=-1
+        for idx, thread in enumerate(queues):
+            frame = thread['FrameNumber']
+            if frame != []:
+                if frame < min_frame:
+                    min_frame = frame
+                    index=idx
+        return index, min_frame
+    def play_video(self, command):
+        # print(inspect.currentframe().f_back.f_code.co_name, '->play_video: ')
+        if command == "play":
+            # Initialization
+            self.play = True
+            self.fps_average = []
+            self.process_qs = []
+            self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+            self.frame_timer = time.time()
+            # Create reusable queue based on number of threads
+            for i in range(self.parameters['ThreadsSlider']):
+                    new_process_q = self.process_q.copy()
+                    self.process_qs.append(new_process_q)
+            # Start up audio if requested
+            if self.control['AudioButton']:
+                seek_time = (self.current_frame)/self.fps
+                args =  ["ffplay",
+                        '-vn',
+                        '-ss', str(seek_time),
+                        '-nodisp',
+                        '-stats',
+                        '-loglevel',  'quiet',
+                        '-sync',  'audio',
+                        self.video_file]
+                self.audio_sp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+                # Parse the console to find where the audio started
+                while True:
+                    temp = self.audio_sp.stdout.read(69)
+                    if temp[:7] != b'    nan':
+                        sought_time = float(temp[:7])
+                        self.current_frame = int(self.fps*sought_time)
+                        self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+                        break
+#'    nan    :  0.000
+#'   1.25 M-A:  0.000 fd=   0 aq=   12KB vq=    0KB sq=    0B f=0/0'
+        elif command == "stop":
+            self.play = False
+            self.add_action("stop_play", True)
+            index, min_frame = self.find_lowest_frame(self.process_qs)
+            if index != -1:
+                self.current_frame = min_frame-1
+            if self.control['AudioButton']:
+                self.audio_sp.terminate()
+            torch.cuda.empty_cache()
+        elif command=='stop_from_gui':
+            self.play = False
+            # Find the lowest frame in the current render queue and set the current frame to the one before it
+            index, min_frame = self.find_lowest_frame(self.process_qs)
+            if index != -1:
+                self.current_frame = min_frame-1
+            if self.control['AudioButton']:
+                self.audio_sp.terminate()
+            torch.cuda.empty_cache()
+        elif command == "record":
+            self.record = True
+            self.play = True
+            self.total_thread_time = 0.0
+            self.process_qs = []
+            self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
+            for i in range(self.parameters['ThreadsSlider']):
+                    new_process_q = self.process_q.copy()
+                    self.process_qs.append(new_process_q)
+           # Initialize
+            self.timer = time.time()
+            frame_width = int(self.capture.get(3))
+            frame_width = int(self.capture.get(3))
+            frame_height = int(self.capture.get(4))
+            self.start_time = float(self.capture.get(cv2.CAP_PROP_POS_FRAMES) / float(self.fps))
+            self.file_name = os.path.splitext(os.path.basename(self.target_video))
+            base_filename =  self.file_name[0]+"_"+str(time.time())[:10]
+            self.output = os.path.join(self.saved_video_path, base_filename)
+            self.temp_file = self.output+"_temp"+self.file_name[1]
+            if self.parameters['RecordTypeTextSel']=='FFMPEG':
+                args =  ["ffmpeg",
+                        '-hide_banner',
+                        '-loglevel',    'error',
+                        "-an",
+                        "-r",           str(self.fps),
+                        "-i",           "pipe:",
+                        # '-g',           '25',
+                        "-vf",          "format=yuvj420p",
+                        "-c:v",         "libx264",
+                        "-crf",         str(self.parameters['VideoQualSlider']),
+                        "-r",           str(self.fps),
+                        "-s",           str(frame_width)+"x"+str(frame_height),
+                        self.temp_file]
+                self.sp = subprocess.Popen(args, stdin=subprocess.PIPE)
+            elif self.parameters['RecordTypeTextSel']=='OPENCV':
+                size = (frame_width, frame_height)
+                self.sp = cv2.VideoWriter(self.temp_file,  cv2.VideoWriter_fourcc(*'mp4v') , self.fps, size)
+    # @profile
+    def process(self):
+        process_qs_len = range(len(self.process_qs))
+        # Add threads to Queue
+        if self.play == True and self.is_video_loaded == True:
+            for item in self.process_qs:
+                if item['Status'] == 'clear' and self.current_frame < self.video_frame_total:
+                    item['Thread'] = threading.Thread(target=self.thread_video_read, args = [self.current_frame]).start()
+                    item['FrameNumber'] = self.current_frame
+                    item['Status'] = 'started'
+                    item['ThreadTime'] = time.time()
+                    self.current_frame += 1
+                    break
+        else:
+            self.play = False
+        # Always be emptying the queues
+        time_diff = time.time() - self.frame_timer
+        if not self.record and time_diff >= 1.0/float(self.fps) and self.play:
+            index, min_frame = self.find_lowest_frame(self.process_qs)
+            if index != -1:
+                if self.process_qs[index]['Status'] == 'finished':
+                    temp = [self.process_qs[index]['ProcessedFrame'], self.process_qs[index]['FrameNumber']]
+                    self.frame_q.append(temp)
+                    # Report fps, other data
+                    self.fps_average.append(1.0/time_diff)
+                    if len(self.fps_average) >= floor(self.fps):
+                        fps = round(np.average(self.fps_average), 2)
+                        msg = "%s fps, %s process time" % (fps, round(self.process_qs[index]['ThreadTime'], 4))
+                        self.fps_average = []
+                    if self.process_qs[index]['FrameNumber'] >= self.video_frame_total-1 or self.process_qs[index]['FrameNumber'] == self.stop_marker:
+                        self.play_video('stop')
+                    self.process_qs[index]['Status'] = 'clear'
+                    self.process_qs[index]['Thread'] = []
+                    self.process_qs[index]['FrameNumber'] = []
+                    self.process_qs[index]['ThreadTime'] = []
+                    self.frame_timer += 1.0/self.fps
+        elif self.record:
+            index, min_frame = self.find_lowest_frame(self.process_qs)
+            if index != -1:
+                # If the swapper thread has finished generating a frame
+                if self.process_qs[index]['Status'] == 'finished':
+                    image = self.process_qs[index]['ProcessedFrame']
+                    if self.parameters['RecordTypeTextSel']=='FFMPEG':
+                        pil_image = Image.fromarray(image)
+                        pil_image.save(self.sp.stdin, 'BMP')
+                    elif self.parameters['RecordTypeTextSel']=='OPENCV':
+                        self.sp.write(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+                    temp = [image, self.process_qs[index]['FrameNumber']]
+                    self.frame_q.append(temp)
+                    # Close video and process
+                    if self.process_qs[index]['FrameNumber'] >= self.video_frame_total-1 or self.process_qs[index]['FrameNumber'] == self.stop_marker or self.play == False:
+                        self.play_video("stop")
+                        stop_time = float(self.capture.get(cv2.CAP_PROP_POS_FRAMES) / float(self.fps))
+                        if stop_time == 0:
+                            stop_time = float(self.video_frame_total) / float(self.fps)
+                        if self.parameters['RecordTypeTextSel']=='FFMPEG':
+                            self.sp.stdin.close()
+                            self.sp.wait()
+                        elif self.parameters['RecordTypeTextSel']=='OPENCV':
+                            self.sp.release()
+                        orig_file = self.target_video
+                        final_file = self.output+self.file_name[1]
+                        print("adding audio...")
+                        args = ["ffmpeg",
+                                '-hide_banner',
+                                '-loglevel',    'error',
+                                "-i", self.temp_file,
+                                "-ss", str(self.start_time), "-to", str(stop_time), "-i",  orig_file,
+                                "-c",  "copy", # may be c:v
+                                "-map", "0:v:0", "-map", "1:a:0?",
+                                "-shortest",
+                                final_file]
+                        four = subprocess.run(args)
+                        os.remove(self.temp_file)
+                        timef= time.time() - self.timer
+                        self.record = False
+                        print('Video saved as:', final_file)
+                        msg = "Total time: %s s." % (round(timef,1))
+                        print(msg)
+                    self.total_thread_time = []
+                    self.process_qs[index]['Status'] = 'clear'
+                    self.process_qs[index]['FrameNumber'] = []
+                    self.process_qs[index]['Thread'] = []
+                    self.frame_timer = time.time()
+    # @profile
+    def thread_video_read(self, frame_number):
+        with lock:
+            success, target_image = self.capture.read()
+        if success:
+            target_image = cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB)
+            if not self.control['SwapFacesButton']:
+                temp = [target_image, frame_number]
+            else:
+                temp = [self.swap_video(target_image, frame_number, True), frame_number]
+            for item in self.process_qs:
+                if item['FrameNumber'] == frame_number:
+                    item['ProcessedFrame'] = temp[0]
+                    item['Status'] = 'finished'
+                    item['ThreadTime'] = time.time() - item['ThreadTime']
+                    break
+    # @profile
+    def swap_video(self, target_image, frame_number, use_markers):
+        # Grab a local copy of the parameters to prevent threading issues
+        parameters = self.parameters.copy()
+        control = self.control.copy()
+        # Find out if the frame is in a marker zone and copy the parameters if true
+        if self.markers and use_markers:
+            temp=[]
+            for i in range(len(self.markers)):
+                temp.append(self.markers[i]['frame'])
+            idx = bisect.bisect(temp, frame_number)
+            parameters = self.markers[idx-1]['parameters'].copy()
+        # Load frame into VRAM
+        img = torch.from_numpy(target_image.astype('uint8')).to('cuda') #HxWxc
+        img = img.permute(2,0,1)#cxHxW
+        #Scale up frame if it is smaller than 512
+        img_x = img.size()[2]
+        img_y = img.size()[1]
+        if img_x<512 and img_y<512:
+            # if x is smaller, set x to 512
+            if img_x <= img_y:
+                tscale = v2.Resize((int(512*img_y/img_x), 512), antialias=True)
+            else:
+                tscale = v2.Resize((512, int(512*img_x/img_y)), antialias=True)
+            img = tscale(img)
+        elif img_x<512:
+            tscale = v2.Resize((int(512*img_y/img_x), 512), antialias=True)
+            img = tscale(img)
+        elif img_y<512:
+            tscale = v2.Resize((512, int(512*img_x/img_y)), antialias=True)
+            img = tscale(img)
+        # Rotate the frame
+        if parameters['OrientSwitch']:
+            img = v2.functional.rotate(img, angle=parameters['OrientSlider'], interpolation=v2.InterpolationMode.BILINEAR, expand=True)
+        # Find all faces in frame and return a list of 5-pt kpss
+        bboxes, kpss = self.func_w_test("detect", self.models.run_detect, img, parameters['DetectTypeTextSel'], max_num=20, score=parameters['DetectScoreSlider']/100.0, use_landmark_detection=parameters['LandmarksDetectionAdjSwitch'], landmark_detect_mode=parameters["LandmarksDetectTypeTextSel"], landmark_score=parameters["LandmarksDetectScoreSlider"]/100.0, from_points=parameters["LandmarksAlignModeFromPointsSwitch"])
+        # Get embeddings for all faces found in the frame
+        ret = []
+        for face_kps in kpss:
+            face_emb, _ = self.func_w_test('recognize',  self.models.run_recognize, img, face_kps)
+            ret.append([face_kps, face_emb])
+        if ret:
+            # Loop through target faces to see if they match our found face embeddings
+            for fface in ret:
+                for found_face in self.found_faces:
+                    # sim between face in video and already found face
+                    sim = self.findCosineDistance(fface[1], found_face["Embedding"])
+                    # if the face[i] in the frame matches afound face[j] AND the found face is active (not [])
+                    if sim>=float(parameters["ThresholdSlider"]) and found_face["SourceFaceAssignments"]:
+                        s_e = found_face["AssignedEmbedding"]
+                        # s_e = found_face['ptrdata']
+                        img = self.func_w_test("swap_video", self.swap_core, img, fface[0], s_e, parameters, control)
+                        # img = img.permute(2,0,1)
+            img = img.permute(1,2,0)
+            if not control['MaskViewButton'] and parameters['OrientSwitch']:
+                img = img.permute(2,0,1)
+                img = transforms.functional.rotate(img, angle=-parameters['OrientSlider'], expand=True)
+                img = img.permute(1,2,0)
+        else:
+            img = img.permute(1,2,0)
+            if parameters['OrientSwitch']:
+                img = img.permute(2,0,1)
+                img = v2.functional.rotate(img, angle=-parameters['OrientSlider'], interpolation=v2.InterpolationMode.BILINEAR, expand=True)
+                img = img.permute(1,2,0)
+        if self.perf_test:
+            print('------------------------')
+        # Unscale small videos
+        if img_x <512 or img_y < 512:
+            tscale = v2.Resize((img_y, img_x), antialias=True)
+            img = img.permute(2,0,1)
+            img = tscale(img)
+            img = img.permute(1,2,0)
+        img = img.cpu().numpy()
+        if parameters["ShowLandmarksSwitch"]:
+            if ret:
+                if img_y <= 720:
+                    p = 1
+                else:
+                    p = 2
+                for face in ret:
+                    for kpoint in face[0]:
+                        for i in range(-1, p):
+                            for j in range(-1, p):
+                                try:
+                                    img[int(kpoint[1])+i][int(kpoint[0])+j][0] = 0
+                                    img[int(kpoint[1])+i][int(kpoint[0])+j][1] = 255
+                                    img[int(kpoint[1])+i][int(kpoint[0])+j][2] = 255
+                                except:
+                                    print("Key-points value {} exceed the image size {}.".format(kpoint, (img_x, img_y)))
+                                    continue
+        return img.astype(np.uint8)
+    def findCosineDistance(self, vector1, vector2):
+        vector1 = vector1.ravel()
+        vector2 = vector2.ravel()
+        cos_dist = 1.0 - np.dot(vector1, vector2)/(np.linalg.norm(vector1)*np.linalg.norm(vector2)) # 2..0
+        return 100.0-cos_dist*50.0
+        '''
+        vector1 = vector1.ravel()
+        vector2 = vector2.ravel()
+        return 1 - np.dot(vector1, vector2)/(np.linalg.norm(vector1)*np.linalg.norm(vector2))
+        '''
+    def func_w_test(self, name, func, *args, **argsv):
+        timing = time.time()
+        result = func(*args, **argsv)
+        if self.perf_test:
+            print(name, round(time.time()-timing, 5), 's')
+        return result
+    # @profile
+    def swap_core(self, img, kps, s_e, parameters, control): # img = RGB
+        # 512 transforms
+        dst = self.arcface_dst * 4.0
+        dst[:,0] += 32.0
+        # Change the ref points
+        if parameters['FaceAdjSwitch']:
+            dst[:,0] += parameters['KPSXSlider']
+            dst[:,1] += parameters['KPSYSlider']
+            dst[:,0] -= 255
+            dst[:,0] *= (1+parameters['KPSScaleSlider']/100)
+            dst[:,0] += 255
+            dst[:,1] -= 255
+            dst[:,1] *= (1+parameters['KPSScaleSlider']/100)
+            dst[:,1] += 255
+        tform = trans.SimilarityTransform()
+        tform.estimate(kps, dst)
+        # Scaling Transforms
+        t512 = v2.Resize((512, 512), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
+        t256 = v2.Resize((256, 256), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
+        t128 = v2.Resize((128, 128), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
+        # Grab 512 face from image and create 256 and 128 copys
+        original_face_512 = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0), interpolation=v2.InterpolationMode.BILINEAR )
+        original_face_512 = v2.functional.crop(original_face_512, 0,0, 512, 512)# 3, 512, 512
+        original_face_256 = t256(original_face_512)
+        original_face_128 = t128(original_face_256)
+        latent = torch.from_numpy(self.models.calc_swapper_latent(s_e)).float().to('cuda')
+        dim = 1
+        if parameters['SwapperTypeTextSel'] == '128':
+            dim = 1
+            input_face_affined = original_face_128
+        elif parameters['SwapperTypeTextSel'] == '256':
+            dim = 2
+            input_face_affined = original_face_256
+        elif parameters['SwapperTypeTextSel'] == '512':
+            dim = 4
+            input_face_affined = original_face_512
+        # Optional Scaling # change the thransform matrix
+        if parameters['FaceAdjSwitch']:
+            input_face_affined = v2.functional.affine(input_face_affined, 0, (0, 0), 1 + parameters['FaceScaleSlider'] / 100, 0, center=(dim*128-1, dim*128-1), interpolation=v2.InterpolationMode.BILINEAR)
+        itex = 1
+        if parameters['StrengthSwitch']:
+            itex = ceil(parameters['StrengthSlider'] / 100.)
+        output_size = int(128 * dim)
+        output = torch.zeros((output_size, output_size, 3), dtype=torch.float32, device='cuda')
+        input_face_affined = input_face_affined.permute(1, 2, 0)
+        input_face_affined = torch.div(input_face_affined, 255.0)
+        for k in range(itex):
+            for j in range(dim):
+                for i in range(dim):
+                    input_face_disc = input_face_affined[j::dim,i::dim]
+                    input_face_disc = input_face_disc.permute(2, 0, 1)
+                    input_face_disc = torch.unsqueeze(input_face_disc, 0).contiguous()
+                    swapper_output = torch.empty((1,3,128,128), dtype=torch.float32, device='cuda').contiguous()
+                    self.models.run_swapper(input_face_disc, latent, swapper_output)
+                    swapper_output = torch.squeeze(swapper_output)
+                    swapper_output = swapper_output.permute(1, 2, 0)
+                    output[j::dim, i::dim] = swapper_output.clone()
+            prev_face = input_face_affined.clone()
+            input_face_affined = output.clone()
+            output = torch.mul(output, 255)
+            output = torch.clamp(output, 0, 255)
+        output = output.permute(2, 0, 1)
+        swap = t512(output)
+        if parameters['StrengthSwitch']:
+            if itex == 0:
+                swap = original_face_512.clone()
+            else:
+                alpha = np.mod(parameters['StrengthSlider'], 100)*0.01
+                if alpha==0:
+                    alpha=1
+                # Blend the images
+                prev_face = torch.mul(prev_face, 255)
+                prev_face = torch.clamp(prev_face, 0, 255)
+                prev_face = prev_face.permute(2, 0, 1)
+                prev_face = t512(prev_face)
+                swap = torch.mul(swap, alpha)
+                prev_face = torch.mul(prev_face, 1-alpha)
+                swap = torch.add(swap, prev_face)
+            # swap = torch.squeeze(swap)
+            # swap = torch.mul(swap, 255)
+            # swap = torch.clamp(swap, 0, 255)
+            # # swap_128 = swap
+            # swap = t256(swap)
+            # swap = t512(swap)
+        # Apply color corerctions
+        if parameters['ColorSwitch']:
+            # print(parameters['ColorGammaSlider'])
+            swap = torch.unsqueeze(swap,0)
+            swap = v2.functional.adjust_gamma(swap, parameters['ColorGammaSlider'], 1.0)
+            swap = torch.squeeze(swap)
+            swap = swap.permute(1, 2, 0).type(torch.float32)
+            del_color = torch.tensor([parameters['ColorRedSlider'], parameters['ColorGreenSlider'], parameters['ColorBlueSlider']], device=device)
+            swap += del_color
+            swap = torch.clamp(swap, min=0., max=255.)
+            swap = swap.permute(2, 0, 1).type(torch.uint8)
+        # Create border mask
+        border_mask = torch.ones((128, 128), dtype=torch.float32, device=device)
+        border_mask = torch.unsqueeze(border_mask,0)
+        # if parameters['BorderState']:
+        top = parameters['BorderTopSlider']
+        left = parameters['BorderSidesSlider']
+        right = 128-parameters['BorderSidesSlider']
+        bottom = 128-parameters['BorderBottomSlider']
+        border_mask[:, :top, :] = 0
+        border_mask[:, bottom:, :] = 0
+        border_mask[:, :, :left] = 0
+        border_mask[:, :, right:] = 0
+        gauss = transforms.GaussianBlur(parameters['BorderBlurSlider']*2+1, (parameters['BorderBlurSlider']+1)*0.2)
+        border_mask = gauss(border_mask)
+        # Create image mask
+        swap_mask = torch.ones((128, 128), dtype=torch.float32, device=device)
+        swap_mask = torch.unsqueeze(swap_mask,0)
+        # Face Diffing
+        if parameters["DiffSwitch"]:
+            mask = self.apply_fake_diff(swap, original_face_512, parameters["DiffSlider"])
+            # mask = t128(mask)
+            gauss = transforms.GaussianBlur(parameters['BlendSlider']*2+1, (parameters['BlendSlider']+1)*0.2)
+            mask = gauss(mask.type(torch.float32))
+            swap = swap*mask + original_face_512*(1-mask)
+        # Restorer
+        if parameters["RestorerSwitch"]:
+            swap = self.func_w_test('Restorer', self.apply_restorer, swap, parameters)
+        # Occluder
+        if parameters["OccluderSwitch"]:
+            mask = self.func_w_test('occluder', self.apply_occlusion , original_face_256, parameters["OccluderSlider"])
+            mask = t128(mask)
+            swap_mask = torch.mul(swap_mask, mask)
+        if parameters["FaceParserSwitch"]:
+            mask = self.apply_face_parser(swap, parameters["FaceParserSlider"], parameters['MouthParserSlider'])
+            mask = t128(mask)
+            swap_mask = torch.mul(swap_mask, mask)
+        # CLIPs
+        if parameters["CLIPSwitch"]:
+            with lock:
+                mask = self.func_w_test('CLIP', self.apply_CLIPs, original_face_512, parameters["CLIPTextEntry"], parameters["CLIPSlider"])
+            mask = cv2.resize(mask, (128,128))
+            mask = torch.from_numpy(mask).to('cuda')
+            swap_mask *= mask
+        # Add blur to swap_mask results
+        gauss = transforms.GaussianBlur(parameters['BlendSlider']*2+1, (parameters['BlendSlider']+1)*0.2)
+        swap_mask = gauss(swap_mask)
+        # Combine border and swap mask, scale, and apply to swap
+        swap_mask = torch.mul(swap_mask, border_mask)
+        swap_mask = t512(swap_mask)
+        swap = torch.mul(swap, swap_mask)
+        if not control['MaskViewButton']:
+            # Cslculate the area to be mergerd back to the original frame
+            IM512 = tform.inverse.params[0:2, :]
+            corners = np.array([[0,0], [0,511], [511, 0], [511, 511]])
+            x = (IM512[0][0]*corners[:,0] + IM512[0][1]*corners[:,1] + IM512[0][2])
+            y = (IM512[1][0]*corners[:,0] + IM512[1][1]*corners[:,1] + IM512[1][2])
+            left = floor(np.min(x))
+            if left<0:
+                left=0
+            top = floor(np.min(y))
+            if top<0:
+                top=0
+            right = ceil(np.max(x))
+            if right>img.shape[2]:
+                right=img.shape[2]
+            bottom = ceil(np.max(y))
+            if bottom>img.shape[1]:
+                bottom=img.shape[1]
+            # Untransform the swap
+            swap = v2.functional.pad(swap, (0,0,img.shape[2]-512, img.shape[1]-512))
+            swap = v2.functional.affine(swap, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0,interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
+            swap = swap[0:3, top:bottom, left:right]
+            swap = swap.permute(1, 2, 0)
+            # Untransform the swap mask
+            swap_mask = v2.functional.pad(swap_mask, (0,0,img.shape[2]-512, img.shape[1]-512))
+            swap_mask = v2.functional.affine(swap_mask, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
+            swap_mask = swap_mask[0:1, top:bottom, left:right]
+            swap_mask = swap_mask.permute(1, 2, 0)
+            swap_mask = torch.sub(1, swap_mask)
+            # Apply the mask to the original image areas
+            img_crop = img[0:3, top:bottom, left:right]
+            img_crop = img_crop.permute(1,2,0)
+            img_crop = torch.mul(swap_mask,img_crop)
+            #Add the cropped areas and place them back into the original image
+            swap = torch.add(swap, img_crop)
+            swap = swap.type(torch.uint8)
+            swap = swap.permute(2,0,1)
+            img[0:3, top:bottom, left:right] = swap
+        else:
+            # Invert swap mask
+            swap_mask = torch.sub(1, swap_mask)
+            # Combine preswapped face with swap
+            original_face_512 = torch.mul(swap_mask, original_face_512)
+            original_face_512 = torch.add(swap, original_face_512)
+            original_face_512 = original_face_512.type(torch.uint8)
+            original_face_512 = original_face_512.permute(1, 2, 0)
+            # Uninvert and create image from swap mask
+            swap_mask = torch.sub(1, swap_mask)
+            swap_mask = torch.cat((swap_mask,swap_mask,swap_mask),0)
+            swap_mask = swap_mask.permute(1, 2, 0)
+            # Place them side by side
+            img = torch.hstack([original_face_512, swap_mask*255])
+            img = img.permute(2,0,1)
+        return img
+    # @profile
+    def apply_occlusion(self, img, amount):
+        img = torch.div(img, 255)
+        img = torch.unsqueeze(img, 0)
+        outpred = torch.ones((256,256), dtype=torch.float32, device=device).contiguous()
+        self.models.run_occluder(img, outpred)
+        outpred = torch.squeeze(outpred)
+        outpred = (outpred > 0)
+        outpred = torch.unsqueeze(outpred, 0).type(torch.float32)
+        if amount >0:
+            kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
+            for i in range(int(amount)):
+                outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
+                outpred = torch.clamp(outpred, 0, 1)
+            outpred = torch.squeeze(outpred)
+        if amount <0:
+            outpred = torch.neg(outpred)
+            outpred = torch.add(outpred, 1)
+            kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
+            for i in range(int(-amount)):
+                outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
+                outpred = torch.clamp(outpred, 0, 1)
+            outpred = torch.squeeze(outpred)
+            outpred = torch.neg(outpred)
+            outpred = torch.add(outpred, 1)
+        outpred = torch.reshape(outpred, (1, 256, 256))
+        return outpred
+    def apply_CLIPs(self, img, CLIPText, CLIPAmount):
+        clip_mask = np.ones((352, 352))
+        img = img.permute(1,2,0)
+        img = img.cpu().numpy()
+        # img = img.to(torch.float)
+        # img = img.permute(1,2,0)
+        transform = transforms.Compose([transforms.ToTensor(),
+                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+                                        transforms.Resize((352, 352))])
+        CLIPimg = transform(img).unsqueeze(0)
+        if CLIPText != "":
+            prompts = CLIPText.split(',')
+            with torch.no_grad():
+                preds = self.clip_session(CLIPimg.repeat(len(prompts),1,1,1), prompts)[0]
+                # preds = self.clip_session(CLIPimg,  maskimg, True)[0]
+            clip_mask = 1 - torch.sigmoid(preds[0][0])
+            for i in range(len(prompts)-1):
+                clip_mask *= 1-torch.sigmoid(preds[i+1][0])
+            clip_mask = clip_mask.data.cpu().numpy()
+            thresh = CLIPAmount/100.0
+            clip_mask[clip_mask>thresh] = 1.0
+            clip_mask[clip_mask<=thresh] = 0.0
+        return clip_mask
+    # @profile
+    def apply_face_parser(self, img, FaceAmount, MouthAmount):
+        # atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r', 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
+        outpred = torch.ones((512,512), dtype=torch.float32, device='cuda').contiguous()
+        img = torch.div(img, 255)
+        img = v2.functional.normalize(img, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
+        img = torch.reshape(img, (1, 3, 512, 512))
+        outpred = torch.empty((1,19,512,512), dtype=torch.float32, device='cuda').contiguous()
+        self.models.run_faceparser(img, outpred)
+        outpred = torch.squeeze(outpred)
+        outpred = torch.argmax(outpred, 0)
+        # Mouth Parse
+        if MouthAmount <0:
+            mouth_idxs = torch.tensor([11], device='cuda')
+            iters = int(-MouthAmount)
+            mouth_parse = torch.isin(outpred, mouth_idxs)
+            mouth_parse = torch.clamp(~mouth_parse, 0, 1).type(torch.float32)
+            mouth_parse = torch.reshape(mouth_parse, (1, 1, 512, 512))
+            mouth_parse = torch.neg(mouth_parse)
+            mouth_parse = torch.add(mouth_parse, 1)
+            kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32,
+                                device='cuda')
+            for i in range(iters):
+                mouth_parse = torch.nn.functional.conv2d(mouth_parse, kernel,
+                                                         padding=(1, 1))
+                mouth_parse = torch.clamp(mouth_parse, 0, 1)
+            mouth_parse = torch.squeeze(mouth_parse)
+            mouth_parse = torch.neg(mouth_parse)
+            mouth_parse = torch.add(mouth_parse, 1)
+            mouth_parse = torch.reshape(mouth_parse, (1, 512, 512))
+        elif MouthAmount >0:
+            mouth_idxs = torch.tensor([11,12,13], device='cuda')
+            iters = int(MouthAmount)
+            mouth_parse = torch.isin(outpred, mouth_idxs)
+            mouth_parse = torch.clamp(~mouth_parse, 0, 1).type(torch.float32)
+            mouth_parse = torch.reshape(mouth_parse, (1,1,512,512))
+            mouth_parse = torch.neg(mouth_parse)
+            mouth_parse = torch.add(mouth_parse, 1)
+            kernel = torch.ones((1,1,3,3), dtype=torch.float32, device='cuda')
+            for i in range(iters):
+                mouth_parse = torch.nn.functional.conv2d(mouth_parse, kernel, padding=(1, 1))
+                mouth_parse = torch.clamp(mouth_parse, 0, 1)
+            mouth_parse = torch.squeeze(mouth_parse)
+            mouth_parse = torch.neg(mouth_parse)
+            mouth_parse = torch.add(mouth_parse, 1)
+            mouth_parse = torch.reshape(mouth_parse, (1, 512, 512))
+        else:
+            mouth_parse = torch.ones((1, 512, 512), dtype=torch.float32, device='cuda')
+        # BG Parse
+        bg_idxs = torch.tensor([0, 14, 15, 16, 17, 18], device=device)
+        bg_parse = torch.isin(outpred, bg_idxs)
+        bg_parse = torch.clamp(~bg_parse, 0, 1).type(torch.float32)
+        bg_parse = torch.reshape(bg_parse, (1, 1, 512, 512))
+        if FaceAmount > 0:
+            kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32, device=device)
+            for i in range(int(FaceAmount)):
+                bg_parse = torch.nn.functional.conv2d(bg_parse, kernel, padding=(1, 1))
+                bg_parse = torch.clamp(bg_parse, 0, 1)
+            bg_parse = torch.squeeze(bg_parse)
+        elif FaceAmount < 0:
+            bg_parse = torch.neg(bg_parse)
+            bg_parse = torch.add(bg_parse, 1)
+            kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32, device=device)
+            for i in range(int(-FaceAmount)):
+                bg_parse = torch.nn.functional.conv2d(bg_parse, kernel, padding=(1, 1))
+                bg_parse = torch.clamp(bg_parse, 0, 1)
+            bg_parse = torch.squeeze(bg_parse)
+            bg_parse = torch.neg(bg_parse)
+            bg_parse = torch.add(bg_parse, 1)
+            bg_parse = torch.reshape(bg_parse, (1, 512, 512))
+        else:
+            bg_parse = torch.ones((1,512,512), dtype=torch.float32, device='cuda')
+        out_parse = torch.mul(bg_parse, mouth_parse)
+        return out_parse
+    def apply_bg_face_parser(self, img, FaceParserAmount):
+        # atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r', 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
+        # out = np.ones((512, 512), dtype=np.float32)
+        outpred = torch.ones((512,512), dtype=torch.float32, device='cuda').contiguous()
+        # turn mouth parser off at 0 so someone can just use the mouth parser
+        if FaceParserAmount != 0:
+            img = torch.div(img, 255)
+            img = v2.functional.normalize(img, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
+            img = torch.reshape(img, (1, 3, 512, 512))
+            outpred = torch.empty((1,19,512,512), dtype=torch.float32, device=device).contiguous()
+            self.models.run_faceparser(img, outpred)
+            outpred = torch.squeeze(outpred)
+            outpred = torch.argmax(outpred, 0)
+            test = torch.tensor([ 0, 14, 15, 16, 17, 18], device=device)
+            outpred = torch.isin(outpred, test)
+            outpred = torch.clamp(~outpred, 0, 1).type(torch.float32)
+            outpred = torch.reshape(outpred, (1,1,512,512))
+            if FaceParserAmount >0:
+                kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
+                for i in range(int(FaceParserAmount)):
+                    outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
+                    outpred = torch.clamp(outpred, 0, 1)
+                outpred = torch.squeeze(outpred)
+            if FaceParserAmount <0:
+                outpred = torch.neg(outpred)
+                outpred = torch.add(outpred, 1)
+                kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
+                for i in range(int(-FaceParserAmount)):
+                    outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
+                    outpred = torch.clamp(outpred, 0, 1)
+                outpred = torch.squeeze(outpred)
+                outpred = torch.neg(outpred)
+                outpred = torch.add(outpred, 1)
+        outpred = torch.reshape(outpred, (1, 512, 512))
+        return outpred
+    def apply_restorer(self, swapped_face_upscaled, parameters):
+        temp = swapped_face_upscaled
+        t512 = v2.Resize((512, 512), antialias=False)
+        t256 = v2.Resize((256, 256), antialias=False)
+        t1024 = v2.Resize((1024, 1024), antialias=False)
+        # If using a separate detection mode
+        if parameters['RestorerDetTypeTextSel'] == 'Blend' or parameters['RestorerDetTypeTextSel'] == 'Reference':
+            if parameters['RestorerDetTypeTextSel'] == 'Blend':
+                # Set up Transformation
+                dst = self.arcface_dst * 4.0
+                dst[:,0] += 32.0
+            elif parameters['RestorerDetTypeTextSel'] == 'Reference':
+                try:
+                    dst = self.models.resnet50(swapped_face_upscaled, score=parameters['DetectScoreSlider']/100.0)
+                except:
+                    return swapped_face_upscaled
+            tform = trans.SimilarityTransform()
+            tform.estimate(dst, self.FFHQ_kps)
+            # Transform, scale, and normalize
+            temp = v2.functional.affine(swapped_face_upscaled, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
+            temp = v2.functional.crop(temp, 0,0, 512, 512)
+        temp = torch.div(temp, 255)
+        temp = v2.functional.normalize(temp, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=False)
+        if parameters['RestorerTypeTextSel'] == 'GPEN256':
+            temp = t256(temp)
+        temp = torch.unsqueeze(temp, 0).contiguous()
+        # Bindings
+        outpred = torch.empty((1,3,512,512), dtype=torch.float32, device=device).contiguous()
+        if parameters['RestorerTypeTextSel'] == 'GFPGAN':
+            self.models.run_GFPGAN(temp, outpred)
+        elif parameters['RestorerTypeTextSel'] == 'CF':
+            self.models.run_codeformer(temp, outpred)
+        elif parameters['RestorerTypeTextSel'] == 'GPEN256':
+            outpred = torch.empty((1,3,256,256), dtype=torch.float32, device=device).contiguous()
+            self.models.run_GPEN_256(temp, outpred)
+        elif parameters['RestorerTypeTextSel'] == 'GPEN512':
+            self.models.run_GPEN_512(temp, outpred)
+        elif parameters['RestorerTypeTextSel'] == 'GPEN1024':
+            temp = t1024(temp)
+            outpred = torch.empty((1, 3, 1024, 1024), dtype=torch.float32, device=device).contiguous()
+            self.models.run_GPEN_1024(temp, outpred)
+        # Format back to cxHxW @ 255
+        outpred = torch.squeeze(outpred)
+        outpred = torch.clamp(outpred, -1, 1)
+        outpred = torch.add(outpred, 1)
+        outpred = torch.div(outpred, 2)
+        outpred = torch.mul(outpred, 255)
+        if parameters['RestorerTypeTextSel'] == 'GPEN256':
+            outpred = t512(outpred)
+        elif parameters['RestorerTypeTextSel'] == 'GPEN1024':
+            outpred = t512(outpred)
+        # Invert Transform
+        if parameters['RestorerDetTypeTextSel'] == 'Blend' or parameters['RestorerDetTypeTextSel'] == 'Reference':
+            outpred = v2.functional.affine(outpred, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
+        # Blend
+        alpha = float(parameters["RestorerSlider"])/100.0
+        outpred = torch.add(torch.mul(outpred, alpha), torch.mul(swapped_face_upscaled, 1-alpha))
+        return outpred
+    def apply_fake_diff(self, swapped_face, original_face, DiffAmount):
+        swapped_face = swapped_face.permute(1,2,0)
+        original_face = original_face.permute(1,2,0)
+        diff = swapped_face-original_face
+        diff = torch.abs(diff)
+        # Find the diffrence between the swap and original, per channel
+        fthresh = DiffAmount*2.55
+        # Bimodal
+        diff[diff<fthresh] = 0
+        diff[diff>=fthresh] = 1
+        # If any of the channels exceeded the threshhold, them add them to the mask
+        diff = torch.sum(diff, dim=2)
+        diff = torch.unsqueeze(diff, 2)
+        diff[diff>0] = 1
+        diff = diff.permute(2,0,1)
+        return diff
+    def clear_mem(self):
+        del self.swapper_model
+        del self.GFPGAN_model
+        del self.occluder_model
+        del self.face_parsing_model
+        del self.codeformer_model
+        del self.GPEN_256_model
+        del self.GPEN_512_model
+        del self.GPEN_1024_model
+        del self.resnet_model
+        del self.detection_model
+        del self.recognition_model
+        self.swapper_model = []
+        self.GFPGAN_model = []
+        self.occluder_model = []
+        self.face_parsing_model = []
+        self.codeformer_model = []
+        self.GPEN_256_model = []
+        self.GPEN_512_model = []
+        self.GPEN_1024_model = []
+        self.resnet_model = []
+        self.detection_model = []
+        self.recognition_model = []
+        # test = swap.permute(1, 2, 0)
+        # test = test.cpu().numpy()
+        # cv2.imwrite('2.jpg', test)

rope/media/tl_beg_off.png ADDED Viewed

rope/media/tl_beg_on.png ADDED Viewed

rope/media/tl_left_hover.png ADDED Viewed

rope/media/tl_left_off.png ADDED Viewed

rope/media/tl_left_on.png ADDED Viewed

rope/media/tl_right_hover.png ADDED Viewed

rope/media/tl_right_off.png ADDED Viewed

rope/media/tl_right_on.png ADDED Viewed