acmyu's picture
output pose coords as list
968474b
raw
history blame
3.02 kB
from typing import Callable, Dict, Optional, Union
import cv2
import numpy as np
import PIL
import PIL.Image
import torch
from huggingface_hub import hf_hub_download
from .body_estimation import Wholebody, resize_image
from .draw import draw_openpose
class DWposeDetector:
def __init__(self, device: str = "сpu"):
hf_hub_download("RedHash/DWPose", "yolox_l.onnx", local_dir="./checkpoints")
hf_hub_download("RedHash/DWPose", "dw-ll_ucoco_384.onnx", local_dir="./checkpoints")
self.pose_estimation = Wholebody(
device=device, model_det="checkpoints/yolox_l.onnx", model_pose="checkpoints/dw-ll_ucoco_384.onnx"
)
def _format_pose(self, candidates, scores, width, height):
num_candidates, _, locs = candidates.shape
candidates[..., 0] /= float(width)
candidates[..., 1] /= float(height)
bodies = candidates[:, :18].copy()
bodies_flat = bodies.reshape(num_candidates * 18, locs)
body_scores = scores[:, :18]
for i in range(len(body_scores)):
for j in range(len(body_scores[i])):
if body_scores[i][j] > 0.3:
body_scores[i][j] = int(18 * i + j)
else:
body_scores[i][j] = -1
faces = candidates[:, 24:92]
faces_scores = scores[:, 24:92]
hands = np.vstack([candidates[:, 92:113], candidates[:, 113:]])
hands_scores = np.vstack([scores[:, 92:113], scores[:, 113:]])
pose = dict(
bodies=bodies_flat,
bodies_multi=bodies,
body_scores=body_scores,
hands=hands,
hands_scores=hands_scores,
faces=faces,
faces_scores=faces_scores,
num_candidates=num_candidates,
)
return pose
@torch.inference_mode()
def __call__(
self,
image: Union[PIL.Image.Image, np.ndarray],
detect_resolution: int = 512,
draw_pose: Optional[Callable] = draw_openpose,
output_type: str = "pil",
**kwargs,
) -> Union[PIL.Image.Image, np.ndarray, Dict]:
if type(image) != np.ndarray:
image = np.array(image.convert("RGB"))
image = image.copy()
original_height, original_width, _ = image.shape
image = resize_image(image, target_resolution=detect_resolution)
height, width, _ = image.shape
candidates, scores = self.pose_estimation(image)
pose = self._format_pose(candidates, scores, width, height)
if not draw_pose:
return pose
pose_image = draw_pose(pose, height=height, width=width, **kwargs)
pose_image = cv2.resize(pose_image, (original_width, original_height), cv2.INTER_LANCZOS4)
if output_type == "pil":
pose_image = PIL.Image.fromarray(pose_image)
elif output_type == "np":
pass
else:
raise ValueError("output_type should be 'pil' or 'np'")
return pose_image, pose