Charles-Elena
/

InstantID

Diffusers

ONNX

Safetensors

Model card Files Files and versions

xet

Community

yamildiego commited on Mar 16, 2024

Commit

5255f7f

1 Parent(s): 2391a94

added depth

Browse files

Files changed (1) hide show

handler.py +36 -38

handler.py CHANGED Viewed

@@ -45,19 +45,19 @@ class EndpointHandler():
         face_adapter = f"./checkpoints/ip-adapter.bin"
         controlnet_path = f"./checkpoints/ControlNetModel"
-        # transform = Compose([
-        #     Resize(
-        #         width=518,
-        #         height=518,
-        #         resize_target=False,
-        #         keep_aspect_ratio=True,
-        #         ensure_multiple_of=14,
-        #         resize_method='lower_bound',
-        #         image_interpolation_method=cv2.INTER_CUBIC,
-        #     ),
-        #     NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        #     PrepareForNet(),
-        # ])
         self.controlnet_identitynet = ControlNetModel.from_pretrained(
             controlnet_path, torch_dtype=dtype
@@ -92,7 +92,7 @@ class EndpointHandler():
         controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
         controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
-        # controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
         controlnet_pose = ControlNetModel.from_pretrained(
             controlnet_pose_model, torch_dtype=dtype
@@ -100,49 +100,49 @@ class EndpointHandler():
         controlnet_canny = ControlNetModel.from_pretrained(
             controlnet_canny_model, torch_dtype=dtype
         ).to(device)
-        # controlnet_depth = ControlNetModel.from_pretrained(
-        #     controlnet_depth_model, torch_dtype=dtype
-        # ).to(device)
         openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
-        # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
         def get_canny_image(image, t1=100, t2=200):
             image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
             edges = cv2.Canny(image, t1, t2)
             return Image.fromarray(edges, "L")
-        # def get_depth_map(image):
-        #     image = np.array(image) / 255.0
-        #     h, w = image.shape[:2]
-        #     image = transform({'image': image})['image']
-        #     image = torch.from_numpy(image).unsqueeze(0).to("cuda")
-        #     with torch.no_grad():
-        #         depth = depth_anything(image)
-        #     depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
-        #     depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
-        #     depth = depth.cpu().numpy().astype(np.uint8)
-        #     depth_image = Image.fromarray(depth)
-        #     return depth_image
         self.controlnet_map = {
             "pose": controlnet_pose,
             "canny": controlnet_canny,
-            # "depth": controlnet_depth,
         }
         self.controlnet_map_fn = {
             "pose": openpose,
             "canny": get_canny_image,
-            # "depth": get_depth_map,
         }
         self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
@@ -153,14 +153,15 @@ class EndpointHandler():
         identitynet_strength_ratio = 0.8
         pose_strength = 0.4
         canny_strength = 0.3
-        self.my_controlnet_selection = ["pose", "canny"]
         controlnet_scales = {
             "pose": pose_strength,
             "canny": canny_strength,
-            # "depth": depth_strength,
         }
         self.pipe.controlnet = MultiControlNetModel(
             [self.controlnet_identitynet]
             + [self.controlnet_map[s] for s in self.my_controlnet_selection]
@@ -171,7 +172,6 @@ class EndpointHandler():
     def __call__(self, data):
         default_prompt = "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic"
         default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
@@ -185,8 +185,6 @@ class EndpointHandler():
         pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/9bP9tMb/pose-2-1.jpg")
         adapter_strength_ratio = 0.8
-        # depth_strength = 0.5
-        # controlnet_selection = ["pose", "canny", "depth"]
         def convert_from_cv2_to_image(img: np.ndarray) -> Image:
             return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

         face_adapter = f"./checkpoints/ip-adapter.bin"
         controlnet_path = f"./checkpoints/ControlNetModel"
+        transform = Compose([
+            Resize(
+                width=518,
+                height=518,
+                resize_target=False,
+                keep_aspect_ratio=True,
+                ensure_multiple_of=14,
+                resize_method='lower_bound',
+                image_interpolation_method=cv2.INTER_CUBIC,
+            ),
+            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            PrepareForNet(),
+        ])
         self.controlnet_identitynet = ControlNetModel.from_pretrained(
             controlnet_path, torch_dtype=dtype
         controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
         controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
+        controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
         controlnet_pose = ControlNetModel.from_pretrained(
             controlnet_pose_model, torch_dtype=dtype
         controlnet_canny = ControlNetModel.from_pretrained(
             controlnet_canny_model, torch_dtype=dtype
         ).to(device)
+        controlnet_depth = ControlNetModel.from_pretrained(
+            controlnet_depth_model, torch_dtype=dtype
+        ).to(device)
         openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+        depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
         def get_canny_image(image, t1=100, t2=200):
             image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
             edges = cv2.Canny(image, t1, t2)
             return Image.fromarray(edges, "L")
+        def get_depth_map(image):
+            image = np.array(image) / 255.0
+            h, w = image.shape[:2]
+            image = transform({'image': image})['image']
+            image = torch.from_numpy(image).unsqueeze(0).to("cuda")
+            with torch.no_grad():
+                depth = depth_anything(image)
+            depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
+            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+            depth = depth.cpu().numpy().astype(np.uint8)
+            depth_image = Image.fromarray(depth)
+            return depth_image
         self.controlnet_map = {
             "pose": controlnet_pose,
             "canny": controlnet_canny,
+            "depth": controlnet_depth,
         }
         self.controlnet_map_fn = {
             "pose": openpose,
             "canny": get_canny_image,
+            "depth": get_depth_map,
         }
         self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
         identitynet_strength_ratio = 0.8
         pose_strength = 0.4
         canny_strength = 0.3
+        depth_strength = 0.5
+        self.my_controlnet_selection = ["pose", "canny", "depth"]
         controlnet_scales = {
             "pose": pose_strength,
             "canny": canny_strength,
+            "depth": depth_strength,
         }
         self.pipe.controlnet = MultiControlNetModel(
             [self.controlnet_identitynet]
             + [self.controlnet_map[s] for s in self.my_controlnet_selection]
     def __call__(self, data):
         default_prompt = "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic"
         default_negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy"
         pose_image_path = data.pop("pose_image_path", "https://i.ibb.co/9bP9tMb/pose-2-1.jpg")
         adapter_strength_ratio = 0.8
         def convert_from_cv2_to_image(img: np.ndarray) -> Image:
             return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))