File size: 2,274 Bytes
8e36f3b 018189f 8e36f3b 47e7e45 8e36f3b 018189f 8e36f3b 73dcbc4 018189f 73dcbc4 018189f 8e36f3b 018189f 8e36f3b 018189f 8e36f3b 73dcbc4 8e36f3b 018189f 8e36f3b 73dcbc4 018189f 73dcbc4 018189f 73dcbc4 018189f 73dcbc4 89a69e2 018189f 8e36f3b 018189f 8e36f3b 018189f 73dcbc4 8e36f3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import torch
import gradio as gr
import cv2
import numpy as np
from PIL import Image
# -------- Load MiDaS model ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas = torch.hub.load("isl-org/MiDaS", "MiDaS_small", trust_repo=True)
midas.to(device)
midas.eval()
transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
transform = transforms.small_transform
# -------- Load YOLOv11 Pose Model ----------
from ultralytics import YOLO
pose_model = YOLO("yolo11n-pose.pt") # YOLOv11 pose
def run_pose_depth(image: Image.Image):
img = np.array(image)
if img.shape[2] == 4:
img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w, _ = img_rgb.shape
# --- Depth ---
input_batch = transform(img_rgb).to(device)
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img_rgb.shape[:2],
mode="bicubic",
align_corners=False
).squeeze()
depth_map = prediction.cpu().numpy()
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
depth_img = (depth_norm * 255).astype(np.uint8)
depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
depth_pil = Image.fromarray(depth_img)
results = pose_model(img_rgb)[0]
keypoints_list = []
if results.keypoints is not None:
for person in results.keypoints:
# get keypoints as numpy array
keypoints_array = person.data.cpu().numpy()[0]
joints = []
for kp in keypoints_array:
x, y, conf = kp
px = int(np.clip(x, 0, w-1))
py = int(np.clip(y, 0, h-1))
z = float(depth_map[py, px])
joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
keypoints_list.append(joints)
return depth_pil, keypoints_list
iface = gr.Interface(
fn=run_pose_depth,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="pil"), gr.JSON()],
title="YOLOv11 Pose + MiDaS Depth",
description="Upload image → Pose keypoints (X,Y,Z) with depth."
)
iface.launch()
|