midas / app.py
farid678's picture
Update app.py
73dcbc4 verified
import torch
import gradio as gr
import cv2
import numpy as np
from PIL import Image
# -------- Load MiDaS model ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas = torch.hub.load("isl-org/MiDaS", "MiDaS_small", trust_repo=True)
midas.to(device)
midas.eval()
transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
transform = transforms.small_transform
# -------- Load YOLOv11 Pose Model ----------
from ultralytics import YOLO
pose_model = YOLO("yolo11n-pose.pt") # YOLOv11 pose
def run_pose_depth(image: Image.Image):
img = np.array(image)
if img.shape[2] == 4:
img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w, _ = img_rgb.shape
# --- Depth ---
input_batch = transform(img_rgb).to(device)
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img_rgb.shape[:2],
mode="bicubic",
align_corners=False
).squeeze()
depth_map = prediction.cpu().numpy()
depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
depth_img = (depth_norm * 255).astype(np.uint8)
depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
depth_pil = Image.fromarray(depth_img)
results = pose_model(img_rgb)[0]
keypoints_list = []
if results.keypoints is not None:
for person in results.keypoints:
# get keypoints as numpy array
keypoints_array = person.data.cpu().numpy()[0]
joints = []
for kp in keypoints_array:
x, y, conf = kp
px = int(np.clip(x, 0, w-1))
py = int(np.clip(y, 0, h-1))
z = float(depth_map[py, px])
joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
keypoints_list.append(joints)
return depth_pil, keypoints_list
iface = gr.Interface(
fn=run_pose_depth,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="pil"), gr.JSON()],
title="YOLOv11 Pose + MiDaS Depth",
description="Upload image → Pose keypoints (X,Y,Z) with depth."
)
iface.launch()