Spaces:
Sleeping
Sleeping
Commit
·
93277a5
1
Parent(s):
b8e0861
scripts for heart and star vertex annotator
Browse files- src/utils.py +45 -0
- src/vit_pose/main_vit_pose.py +81 -0
- src/vit_pose/vertex_annotator_heart.py +53 -0
- src/vit_pose/vertex_annotator_star.py +54 -0
src/utils.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from moviepy import ImageSequenceClip
|
| 4 |
+
from natsort import natsorted
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def create_video_from_images(folder_path, output_video_file, fps):
|
| 8 |
+
"""
|
| 9 |
+
Creates a video file from a sequence of images in a folder.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
folder_path (str): The path to the folder containing the images.
|
| 13 |
+
output_video_file (str): The name of the output video file (e.g., 'my_video.mp4').
|
| 14 |
+
fps (int): The frames per second for the output video.
|
| 15 |
+
"""
|
| 16 |
+
if not os.path.isdir(folder_path):
|
| 17 |
+
print(f"Error: The folder '{folder_path}' does not exist.")
|
| 18 |
+
return
|
| 19 |
+
|
| 20 |
+
# List all image files in the folder.
|
| 21 |
+
# We use natsorted to ensure files with numerical names (e.g., image-1.png, image-10.png)
|
| 22 |
+
# are sorted in a human-friendly way.
|
| 23 |
+
supported_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
|
| 24 |
+
image_files = [
|
| 25 |
+
os.path.join(folder_path, f)
|
| 26 |
+
for f in natsorted(os.listdir(folder_path))
|
| 27 |
+
if f.lower().endswith(supported_extensions)
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
if not image_files:
|
| 31 |
+
print(f"Error: No supported image files found in '{folder_path}'.")
|
| 32 |
+
return
|
| 33 |
+
|
| 34 |
+
print(f"Found {len(image_files)} images. Creating video...")
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Create a video clip from the list of image files.
|
| 38 |
+
clip = ImageSequenceClip(image_files, fps=fps)
|
| 39 |
+
|
| 40 |
+
# Write the video file to the specified path.
|
| 41 |
+
clip.write_videofile(output_video_file, fps=fps)
|
| 42 |
+
|
| 43 |
+
print(f"Successfully created video: '{output_video_file}'")
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"An error occurred while creating the video: {e}")
|
src/vit_pose/main_vit_pose.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import requests
|
| 3 |
+
import numpy as np
|
| 4 |
+
import supervision as sv
|
| 5 |
+
import cv2
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import imageio.v2 as imageio
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation, infer_device
|
| 11 |
+
from vit_pose.vertex_annotator_heart import VertexAnnotatorHeart
|
| 12 |
+
from vit_pose.vertex_annotator_star import VertexAnnotatorStar
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
device = infer_device()
|
| 16 |
+
|
| 17 |
+
# # Detect humans in the image
|
| 18 |
+
person_image_processor = AutoProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
|
| 19 |
+
person_model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd_coco_o365", device_map=device)
|
| 20 |
+
|
| 21 |
+
# Detect keypoints for each person found
|
| 22 |
+
image_processor = AutoProcessor.from_pretrained("usyd-community/vitpose-base-simple")
|
| 23 |
+
model = VitPoseForPoseEstimation.from_pretrained("usyd-community/vitpose-base-simple", device_map=device)
|
| 24 |
+
|
| 25 |
+
def vit_pose_estimation(image, frame_count: int):
|
| 26 |
+
inputs = person_image_processor(images=image, return_tensors="pt").to(person_model.device)
|
| 27 |
+
|
| 28 |
+
with torch.no_grad():
|
| 29 |
+
outputs = person_model(**inputs)
|
| 30 |
+
|
| 31 |
+
results = person_image_processor.post_process_object_detection(
|
| 32 |
+
outputs, target_sizes=torch.tensor([(image.height, image.width)]), threshold=0.3
|
| 33 |
+
)
|
| 34 |
+
result = results[0]
|
| 35 |
+
|
| 36 |
+
# Human label refers 0 index in COCO dataset
|
| 37 |
+
person_boxes = result["boxes"][result["labels"] == 0]
|
| 38 |
+
person_boxes = person_boxes.cpu().numpy()
|
| 39 |
+
|
| 40 |
+
# Convert boxes from VOC (x1, y1, x2, y2) to COCO (x1, y1, w, h) format
|
| 41 |
+
person_boxes[:, 2] = person_boxes[:, 2] - person_boxes[:, 0]
|
| 42 |
+
person_boxes[:, 3] = person_boxes[:, 3] - person_boxes[:, 1]
|
| 43 |
+
|
| 44 |
+
inputs = image_processor(image, boxes=[person_boxes], return_tensors="pt").to(model.device)
|
| 45 |
+
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = model(**inputs)
|
| 48 |
+
|
| 49 |
+
pose_results = image_processor.post_process_pose_estimation(outputs, boxes=[person_boxes])
|
| 50 |
+
image_pose_result = pose_results[0]
|
| 51 |
+
|
| 52 |
+
xy = torch.stack([pose_result['keypoints'] for pose_result in image_pose_result]).cpu().numpy()
|
| 53 |
+
scores = torch.stack([pose_result['scores'] for pose_result in image_pose_result]).cpu().numpy()
|
| 54 |
+
|
| 55 |
+
color_edge_annotator = sv.Color.from_hex("#e1e1e1")
|
| 56 |
+
|
| 57 |
+
color_vertex_annotator = sv.Color.from_hex('#ffc0cb')
|
| 58 |
+
|
| 59 |
+
key_points = sv.KeyPoints(
|
| 60 |
+
xy=xy, confidence=scores
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
edge_annotator = sv.EdgeAnnotator(
|
| 64 |
+
color=color_edge_annotator,
|
| 65 |
+
thickness=1
|
| 66 |
+
)
|
| 67 |
+
vertex_annotator = VertexAnnotatorHeart(
|
| 68 |
+
color=color_vertex_annotator,
|
| 69 |
+
radius=10
|
| 70 |
+
)
|
| 71 |
+
annotated_frame = edge_annotator.annotate(
|
| 72 |
+
scene=image.copy(),
|
| 73 |
+
key_points=key_points
|
| 74 |
+
)
|
| 75 |
+
annotated_frame = vertex_annotator.annotate(
|
| 76 |
+
scene=annotated_frame,
|
| 77 |
+
key_points=key_points
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
return annotated_frame
|
| 81 |
+
|
src/vit_pose/vertex_annotator_heart.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
import supervision as sv
|
| 4 |
+
from supervision.annotators.base import ImageType
|
| 5 |
+
from supervision.keypoint.core import KeyPoints
|
| 6 |
+
from supervision.utils.conversion import ensure_cv2_image_for_annotation
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class VertexAnnotatorHeart(sv.VertexAnnotator):
|
| 10 |
+
"""
|
| 11 |
+
Extend the original class to add vertices with the shape of heart or star.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, color=sv.Color.RED, radius=5):
|
| 15 |
+
super().__init__(color=color, radius=radius)
|
| 16 |
+
|
| 17 |
+
@ensure_cv2_image_for_annotation
|
| 18 |
+
def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
|
| 19 |
+
assert isinstance(scene, np.ndarray)
|
| 20 |
+
if len(key_points) == 0:
|
| 21 |
+
return scene
|
| 22 |
+
|
| 23 |
+
for xy in key_points.xy:
|
| 24 |
+
for x, y in xy:
|
| 25 |
+
# Two circles (top lobes of heart)
|
| 26 |
+
cv2.circle(
|
| 27 |
+
img=scene,
|
| 28 |
+
center=(int(x) - self.radius // 2, int(y)),
|
| 29 |
+
radius=self.radius // 2,
|
| 30 |
+
color=self.color.as_bgr(),
|
| 31 |
+
thickness=-1,
|
| 32 |
+
)
|
| 33 |
+
cv2.circle(
|
| 34 |
+
img=scene,
|
| 35 |
+
center=(int(x) + self.radius // 2, int(y)),
|
| 36 |
+
radius=self.radius // 2,
|
| 37 |
+
color=self.color.as_bgr(),
|
| 38 |
+
thickness=-1,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Triangle (bottom of heart)
|
| 42 |
+
pts = np.array(
|
| 43 |
+
[
|
| 44 |
+
[int(x) - self.radius, int(y)],
|
| 45 |
+
[int(x) + self.radius, int(y)],
|
| 46 |
+
[int(x), y + self.radius * 2],
|
| 47 |
+
],
|
| 48 |
+
np.int32,
|
| 49 |
+
).reshape((-1, 1, 2))
|
| 50 |
+
|
| 51 |
+
cv2.fillPoly(scene, [pts], self.color.as_bgr())
|
| 52 |
+
|
| 53 |
+
return scene
|
src/vit_pose/vertex_annotator_star.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
import supervision as sv
|
| 4 |
+
from supervision.annotators.base import ImageType
|
| 5 |
+
from supervision.keypoint.core import KeyPoints
|
| 6 |
+
from supervision.utils.conversion import ensure_cv2_image_for_annotation
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class VertexAnnotatorStar(sv.VertexAnnotator):
|
| 10 |
+
"""
|
| 11 |
+
Extend the original class to add vertices with the shape of heart or star.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, color=sv.Color.RED, radius=5, thickness=-1):
|
| 15 |
+
super().__init__(color=color, radius=radius)
|
| 16 |
+
self.thickness = thickness
|
| 17 |
+
|
| 18 |
+
@ensure_cv2_image_for_annotation
|
| 19 |
+
def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
|
| 20 |
+
"""Draw a 5-pointed star on an image."""
|
| 21 |
+
assert isinstance(scene, np.ndarray)
|
| 22 |
+
if len(key_points) == 0:
|
| 23 |
+
return scene
|
| 24 |
+
|
| 25 |
+
for xy in key_points.xy:
|
| 26 |
+
for x, y in xy:
|
| 27 |
+
pts = np.array(
|
| 28 |
+
[
|
| 29 |
+
[x, y - self.radius],
|
| 30 |
+
[x + self.radius // 3, y - self.radius // 3],
|
| 31 |
+
[x + self.radius, y - self.radius // 3],
|
| 32 |
+
[x + self.radius // 2, y + self.radius // 6],
|
| 33 |
+
[x + 2 * self.radius // 3, y + self.radius],
|
| 34 |
+
[x, y + self.radius // 2],
|
| 35 |
+
[x - 2 * self.radius // 3, y + self.radius],
|
| 36 |
+
[x - self.radius // 2, y + self.radius // 6],
|
| 37 |
+
[x - self.radius, y - self.radius // 3],
|
| 38 |
+
[x - self.radius // 3, y - self.radius // 3],
|
| 39 |
+
],
|
| 40 |
+
np.int32,
|
| 41 |
+
).reshape((-1, 1, 2))
|
| 42 |
+
|
| 43 |
+
if self.thickness == -1:
|
| 44 |
+
cv2.fillPoly(scene, [pts], self.color.as_bgr())
|
| 45 |
+
else:
|
| 46 |
+
cv2.polylines(
|
| 47 |
+
scene,
|
| 48 |
+
[pts],
|
| 49 |
+
isClosed=True,
|
| 50 |
+
color=self.color.as_bgr(),
|
| 51 |
+
thickness=self.thickness,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return scene
|