Spaces:
Paused
Paused
| from typing import Callable, Dict, Optional, Union | |
| import cv2 | |
| import numpy as np | |
| import PIL | |
| import PIL.Image | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from .body_estimation import Wholebody, resize_image | |
| from .draw import draw_openpose | |
| class DWposeDetector: | |
| def __init__(self, device: str = "сpu"): | |
| hf_hub_download("RedHash/DWPose", "yolox_l.onnx", local_dir="./checkpoints") | |
| hf_hub_download("RedHash/DWPose", "dw-ll_ucoco_384.onnx", local_dir="./checkpoints") | |
| self.pose_estimation = Wholebody( | |
| device=device, model_det="checkpoints/yolox_l.onnx", model_pose="checkpoints/dw-ll_ucoco_384.onnx" | |
| ) | |
| def _format_pose(self, candidates, scores, width, height): | |
| num_candidates, _, locs = candidates.shape | |
| candidates[..., 0] /= float(width) | |
| candidates[..., 1] /= float(height) | |
| bodies = candidates[:, :18].copy() | |
| bodies_flat = bodies.reshape(num_candidates * 18, locs) | |
| body_scores = scores[:, :18] | |
| for i in range(len(body_scores)): | |
| for j in range(len(body_scores[i])): | |
| if body_scores[i][j] > 0.3: | |
| body_scores[i][j] = int(18 * i + j) | |
| else: | |
| body_scores[i][j] = -1 | |
| faces = candidates[:, 24:92] | |
| faces_scores = scores[:, 24:92] | |
| hands = np.vstack([candidates[:, 92:113], candidates[:, 113:]]) | |
| hands_scores = np.vstack([scores[:, 92:113], scores[:, 113:]]) | |
| pose = dict( | |
| bodies=bodies_flat, | |
| bodies_multi=bodies, | |
| body_scores=body_scores, | |
| hands=hands, | |
| hands_scores=hands_scores, | |
| faces=faces, | |
| faces_scores=faces_scores, | |
| num_candidates=num_candidates, | |
| ) | |
| return pose | |
| def __call__( | |
| self, | |
| image: Union[PIL.Image.Image, np.ndarray], | |
| detect_resolution: int = 512, | |
| draw_pose: Optional[Callable] = draw_openpose, | |
| output_type: str = "pil", | |
| **kwargs, | |
| ) -> Union[PIL.Image.Image, np.ndarray, Dict]: | |
| if type(image) != np.ndarray: | |
| image = np.array(image.convert("RGB")) | |
| image = image.copy() | |
| original_height, original_width, _ = image.shape | |
| image = resize_image(image, target_resolution=detect_resolution) | |
| height, width, _ = image.shape | |
| candidates, scores = self.pose_estimation(image) | |
| pose = self._format_pose(candidates, scores, width, height) | |
| if not draw_pose: | |
| return pose | |
| pose_image = draw_pose(pose, height=height, width=width, **kwargs) | |
| pose_image = cv2.resize(pose_image, (original_width, original_height), cv2.INTER_LANCZOS4) | |
| if output_type == "pil": | |
| pose_image = PIL.Image.fromarray(pose_image) | |
| elif output_type == "np": | |
| pass | |
| else: | |
| raise ValueError("output_type should be 'pil' or 'np'") | |
| return pose_image, pose | |