|
|
import torch |
|
|
import gradio as gr |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
midas = torch.hub.load("isl-org/MiDaS", "MiDaS_small", trust_repo=True) |
|
|
midas.to(device) |
|
|
midas.eval() |
|
|
transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True) |
|
|
transform = transforms.small_transform |
|
|
|
|
|
|
|
|
from ultralytics import YOLO |
|
|
pose_model = YOLO("yolo11n-pose.pt") |
|
|
|
|
|
def run_pose_depth(image: Image.Image): |
|
|
img = np.array(image) |
|
|
if img.shape[2] == 4: |
|
|
img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) |
|
|
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
h, w, _ = img_rgb.shape |
|
|
|
|
|
|
|
|
input_batch = transform(img_rgb).to(device) |
|
|
with torch.no_grad(): |
|
|
prediction = midas(input_batch) |
|
|
prediction = torch.nn.functional.interpolate( |
|
|
prediction.unsqueeze(1), |
|
|
size=img_rgb.shape[:2], |
|
|
mode="bicubic", |
|
|
align_corners=False |
|
|
).squeeze() |
|
|
depth_map = prediction.cpu().numpy() |
|
|
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) |
|
|
depth_img = (depth_norm * 255).astype(np.uint8) |
|
|
depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA) |
|
|
depth_pil = Image.fromarray(depth_img) |
|
|
|
|
|
results = pose_model(img_rgb)[0] |
|
|
keypoints_list = [] |
|
|
|
|
|
if results.keypoints is not None: |
|
|
for person in results.keypoints: |
|
|
|
|
|
keypoints_array = person.data.cpu().numpy()[0] |
|
|
joints = [] |
|
|
for kp in keypoints_array: |
|
|
x, y, conf = kp |
|
|
px = int(np.clip(x, 0, w-1)) |
|
|
py = int(np.clip(y, 0, h-1)) |
|
|
z = float(depth_map[py, px]) |
|
|
joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)}) |
|
|
keypoints_list.append(joints) |
|
|
|
|
|
return depth_pil, keypoints_list |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=run_pose_depth, |
|
|
inputs=gr.Image(type="pil"), |
|
|
outputs=[gr.Image(type="pil"), gr.JSON()], |
|
|
title="YOLOv11 Pose + MiDaS Depth", |
|
|
description="Upload image → Pose keypoints (X,Y,Z) with depth." |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|