Spaces:

farid678
/

midas

Runtime error

App Files Files Community

midas / app.py

farid678

Update app.py

73dcbc4 verified 2 months ago

raw

history blame contribute delete

2.27 kB

	import torch
	import gradio as gr
	import cv2
	import numpy as np
	from PIL import Image

	# -------- Load MiDaS model ----------
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	midas = torch.hub.load("isl-org/MiDaS", "MiDaS_small", trust_repo=True)
	midas.to(device)
	midas.eval()
	transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
	transform = transforms.small_transform

	# -------- Load YOLOv11 Pose Model ----------
	from ultralytics import YOLO
	pose_model = YOLO("yolo11n-pose.pt") # YOLOv11 pose

	def run_pose_depth(image: Image.Image):
	img = np.array(image)
	if img.shape[2] == 4:
	img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	h, w, _ = img_rgb.shape

	# --- Depth ---
	input_batch = transform(img_rgb).to(device)
	with torch.no_grad():
	prediction = midas(input_batch)
	prediction = torch.nn.functional.interpolate(
	prediction.unsqueeze(1),
	size=img_rgb.shape[:2],
	mode="bicubic",
	align_corners=False
	).squeeze()
	depth_map = prediction.cpu().numpy()
	depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
	depth_img = (depth_norm * 255).astype(np.uint8)
	depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
	depth_pil = Image.fromarray(depth_img)

	results = pose_model(img_rgb)[0]
	keypoints_list = []

	if results.keypoints is not None:
	for person in results.keypoints:
	# get keypoints as numpy array
	keypoints_array = person.data.cpu().numpy()[0]
	joints = []
	for kp in keypoints_array:
	x, y, conf = kp
	px = int(np.clip(x, 0, w-1))
	py = int(np.clip(y, 0, h-1))
	z = float(depth_map[py, px])
	joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
	keypoints_list.append(joints)

	return depth_pil, keypoints_list

	iface = gr.Interface(
	fn=run_pose_depth,
	inputs=gr.Image(type="pil"),
	outputs=[gr.Image(type="pil"), gr.JSON()],
	title="YOLOv11 Pose + MiDaS Depth",
	description="Upload image → Pose keypoints (X,Y,Z) with depth."
	)

	iface.launch()