| """ |
| Simplified Human Parsing using a pretrained model. |
| This replaces the MediaPipe-based parsing with a proper semantic segmentation model. |
| """ |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from torchvision import transforms |
| import numpy as np |
| from PIL import Image |
| import os |
| import gdown |
|
|
| class SimpleHumanParser: |
| """ |
| A simplified human parsing model using a lightweight segmentation approach. |
| For production use, consider integrating SCHP or Graphonomy. |
| """ |
| def __init__(self): |
| self.device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu') |
| if torch.cuda.is_available(): |
| self.device = torch.device('cuda') |
| |
| |
| |
| self.transform = transforms.Compose([ |
| transforms.ToTensor(), |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
| ]) |
| |
| def parse_image(self, image, pose_data): |
| """ |
| Generate a semantic segmentation map for the person. |
| |
| Args: |
| image: PIL Image |
| pose_data: numpy array of pose keypoints (18, 2) |
| |
| Returns: |
| numpy array of shape (H, W) with class labels |
| """ |
| |
| img_np = np.array(image) |
| h, w = img_np.shape[:2] |
| |
| |
| parse_map = np.zeros((h, w), dtype=np.uint8) |
| |
| |
| |
| |
| |
| parse_map[:] = 0 |
| |
| |
| |
| if pose_data[0][0] > 0: |
| nose = pose_data[0].astype(int) |
| |
| head_radius = int(h * 0.08) |
| y1 = max(0, nose[1] - head_radius * 2) |
| y2 = min(h, nose[1] + head_radius // 2) |
| x1 = max(0, nose[0] - head_radius) |
| x2 = min(w, nose[0] + head_radius) |
| parse_map[y1:y2, x1:x2] = 4 |
| |
| |
| |
| r_shoulder = pose_data[2].astype(int) |
| l_shoulder = pose_data[5].astype(int) |
| r_hip = pose_data[8].astype(int) |
| l_hip = pose_data[11].astype(int) |
| |
| if all(r_shoulder > 0) and all(l_shoulder > 0) and all(r_hip > 0) and all(l_hip > 0): |
| |
| torso_pts = np.array([ |
| r_shoulder, |
| l_shoulder, |
| l_hip, |
| r_hip |
| ], dtype=np.int32) |
| |
| |
| center = torso_pts.mean(axis=0) |
| torso_pts = ((torso_pts - center) * 1.2 + center).astype(np.int32) |
| |
| |
| from PIL import ImageDraw |
| mask_img = Image.new('L', (w, h), 0) |
| draw = ImageDraw.Draw(mask_img) |
| draw.polygon([tuple(p) for p in torso_pts], fill=3) |
| torso_mask = np.array(mask_img) |
| parse_map[torso_mask == 3] = 3 |
| |
| |
| |
| if all(pose_data[2] > 0) and all(pose_data[3] > 0): |
| self._draw_limb(parse_map, pose_data[2], pose_data[3], 6, w, h) |
| if all(pose_data[3] > 0) and all(pose_data[4] > 0): |
| self._draw_limb(parse_map, pose_data[3], pose_data[4], 6, w, h) |
| |
| |
| if all(pose_data[5] > 0) and all(pose_data[6] > 0): |
| self._draw_limb(parse_map, pose_data[5], pose_data[6], 5, w, h) |
| if all(pose_data[6] > 0) and all(pose_data[7] > 0): |
| self._draw_limb(parse_map, pose_data[6], pose_data[7], 5, w, h) |
| |
| |
| |
| if all(pose_data[8] > 0) and all(pose_data[9] > 0): |
| self._draw_limb(parse_map, pose_data[8], pose_data[9], 9, w, h) |
| if all(pose_data[9] > 0) and all(pose_data[10] > 0): |
| self._draw_limb(parse_map, pose_data[9], pose_data[10], 9, w, h) |
| |
| |
| if all(pose_data[11] > 0) and all(pose_data[12] > 0): |
| self._draw_limb(parse_map, pose_data[11], pose_data[12], 12, w, h) |
| if all(pose_data[12] > 0) and all(pose_data[13] > 0): |
| self._draw_limb(parse_map, pose_data[12], pose_data[13], 12, w, h) |
| |
| |
| if pose_data[0][0] > 0: |
| nose = pose_data[0].astype(int) |
| hair_radius = int(h * 0.08) |
| y1 = max(0, nose[1] - hair_radius * 3) |
| y2 = nose[1] - hair_radius |
| x1 = max(0, nose[0] - hair_radius) |
| x2 = min(w, nose[0] + hair_radius) |
| if y2 > y1: |
| parse_map[y1:y2, x1:x2] = 1 |
| |
| return parse_map |
| |
| def _draw_limb(self, parse_map, pt1, pt2, label, w, h): |
| """Draw a limb (line with thickness) on the parse map.""" |
| from PIL import ImageDraw |
| mask_img = Image.new('L', (w, h), 0) |
| draw = ImageDraw.Draw(mask_img) |
| thickness = max(10, int(h * 0.03)) |
| draw.line([tuple(pt1.astype(int)), tuple(pt2.astype(int))], fill=label, width=thickness) |
| limb_mask = np.array(mask_img) |
| parse_map[limb_mask == label] = label |
|
|