| | import gradio as gr |
| | import cv2 |
| | import mediapipe as mp |
| | import numpy as np |
| | from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation |
| | import torch |
| |
|
| | |
| | mp_pose = mp.solutions.pose |
| | pose = mp_pose.Pose(static_image_mode=True, model_complexity=2) |
| | mp_drawing = mp.solutions.drawing_utils |
| |
|
| | |
| | feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") |
| | model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") |
| |
|
| | |
| | PART_COLORS = { |
| | "head": (0, 255, 0), |
| | "shoulders": (255, 0, 0), |
| | "upper_body": (0, 0, 255), |
| | "arms": (255, 255, 0), |
| | "lower_body": (255, 0, 255) |
| | } |
| |
|
| | PART_LABELS = { |
| | "head": [0], |
| | "shoulders": [2], |
| | "upper_body": [3, 4], |
| | "arms": [5, 6], |
| | "lower_body": [7, 8] |
| | } |
| |
|
| | def segment_image(image): |
| | |
| | inputs = feature_extractor(images=image, return_tensors="pt") |
| | outputs = model(**inputs) |
| | logits = outputs.logits |
| | segmentation = torch.argmax(logits, dim=1).squeeze().cpu().numpy() |
| |
|
| | |
| | segmentation_resized = cv2.resize(segmentation, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST) |
| | |
| | |
| | segmented_image = np.zeros_like(image) |
| | |
| | |
| | for part, color in PART_COLORS.items(): |
| | mask = np.isin(segmentation_resized, PART_LABELS[part]) |
| | segmented_image[mask] = color |
| | |
| | return segmented_image |
| |
|
| |
|
| | def estimate_pose(image): |
| | |
| | image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
| | |
| | results = pose.process(image_rgb) |
| | |
| | if not results.pose_landmarks: |
| | return image, segment_image(image) |
| |
|
| | |
| | annotated_image = image.copy() |
| | mp_drawing.draw_landmarks( |
| | annotated_image, |
| | results.pose_landmarks, |
| | mp_pose.POSE_CONNECTIONS, |
| | landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2), |
| | connection_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2), |
| | ) |
| |
|
| | return annotated_image, segment_image(image) |
| |
|
| | |
| | interface = gr.Interface( |
| | fn=estimate_pose, |
| | inputs=gr.Image(type="numpy", label="Upload an Image"), |
| | outputs=[ |
| | gr.Image(type="numpy", label="Pose Landmarks Image"), |
| | gr.Image(type="numpy", label="Segmented Body Parts"), |
| | ], |
| | title="Human Pose Estimation and Segmentation", |
| | description="Upload an image to detect and visualize human pose landmarks and segment body parts (head, shoulders, upper body, arms, lower body) with different colors.", |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | interface.launch() |
| |
|