Spaces:

acmyu
/

KeyframesAI

Paused

App Files Files Community

KeyframesAI / libs /easy_dwpose /dwpose.py

acmyu

output pose coords as list

968474b 5 months ago

raw

history blame contribute delete

3.02 kB

	from typing import Callable, Dict, Optional, Union

	import cv2
	import numpy as np
	import PIL
	import PIL.Image
	import torch
	from huggingface_hub import hf_hub_download

	from .body_estimation import Wholebody, resize_image
	from .draw import draw_openpose


	class DWposeDetector:
	def __init__(self, device: str = "сpu"):
	hf_hub_download("RedHash/DWPose", "yolox_l.onnx", local_dir="./checkpoints")
	hf_hub_download("RedHash/DWPose", "dw-ll_ucoco_384.onnx", local_dir="./checkpoints")
	self.pose_estimation = Wholebody(
	device=device, model_det="checkpoints/yolox_l.onnx", model_pose="checkpoints/dw-ll_ucoco_384.onnx"
	)

	def _format_pose(self, candidates, scores, width, height):
	num_candidates, _, locs = candidates.shape

	candidates[..., 0] /= float(width)
	candidates[..., 1] /= float(height)

	bodies = candidates[:, :18].copy()
	bodies_flat = bodies.reshape(num_candidates * 18, locs)

	body_scores = scores[:, :18]
	for i in range(len(body_scores)):
	for j in range(len(body_scores[i])):
	if body_scores[i][j] > 0.3:
	body_scores[i][j] = int(18 * i + j)
	else:
	body_scores[i][j] = -1

	faces = candidates[:, 24:92]
	faces_scores = scores[:, 24:92]

	hands = np.vstack([candidates[:, 92:113], candidates[:, 113:]])
	hands_scores = np.vstack([scores[:, 92:113], scores[:, 113:]])

	pose = dict(
	bodies=bodies_flat,
	bodies_multi=bodies,
	body_scores=body_scores,
	hands=hands,
	hands_scores=hands_scores,
	faces=faces,
	faces_scores=faces_scores,
	num_candidates=num_candidates,
	)

	return pose

	@torch.inference_mode()
	def __call__(
	self,
	image: Union[PIL.Image.Image, np.ndarray],
	detect_resolution: int = 512,
	draw_pose: Optional[Callable] = draw_openpose,
	output_type: str = "pil",
	**kwargs,
	) -> Union[PIL.Image.Image, np.ndarray, Dict]:
	if type(image) != np.ndarray:
	image = np.array(image.convert("RGB"))

	image = image.copy()
	original_height, original_width, _ = image.shape

	image = resize_image(image, target_resolution=detect_resolution)
	height, width, _ = image.shape

	candidates, scores = self.pose_estimation(image)

	pose = self._format_pose(candidates, scores, width, height)

	if not draw_pose:
	return pose

	pose_image = draw_pose(pose, height=height, width=width, **kwargs)
	pose_image = cv2.resize(pose_image, (original_width, original_height), cv2.INTER_LANCZOS4)

	if output_type == "pil":
	pose_image = PIL.Image.fromarray(pose_image)
	elif output_type == "np":
	pass
	else:
	raise ValueError("output_type should be 'pil' or 'np'")

	return pose_image, pose