Spaces:

farid678
/

midas

Runtime error

App Files Files Community

farid678 commited on Dec 16, 2025

Commit

73dcbc4

verified ·

1 Parent(s): 89a69e2

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -24

app.py CHANGED Viewed

@@ -12,19 +12,18 @@ midas.eval()
 transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
 transform = transforms.small_transform
-# -------- Load YOLOv8 Pose Model ----------
 from ultralytics import YOLO
-pose_model = YOLO("yolov8n-pose.pt")  # small model for speed
 def run_pose_depth(image: Image.Image):
-    # Convert PIL to OpenCV RGB
     img = np.array(image)
     if img.shape[2] == 4:
         img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     h, w, _ = img_rgb.shape
-    # --- 1. Depth Estimation ---
     input_batch = transform(img_rgb).to(device)
     with torch.no_grad():
         prediction = midas(input_batch)
@@ -35,48 +34,35 @@ def run_pose_depth(image: Image.Image):
         align_corners=False
     ).squeeze()
     depth_map = prediction.cpu().numpy()
-    # Normalize for visualization
     depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_img = (depth_norm * 255).astype(np.uint8)
     depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
     depth_pil = Image.fromarray(depth_img)
-    # --- 2. Pose Estimation ---
-    results = pose_model(img_rgb)[0]  # first image
     keypoints_list = []
     if results.keypoints is not None:
         for person in results.keypoints:
             joints = []
-            person_array = person.cpu().numpy()  # convert tensor to numpy
-            for kp in person_array:
-                kp = np.squeeze(kp)  # flatten if necessary
-                if len(kp) < 3:
-                    continue
                 x, y, conf = kp
-                # Clamp x,y
                 px = int(np.clip(x, 0, w-1))
                 py = int(np.clip(y, 0, h-1))
-                z = float(depth_map[py, px])  # Z from MiDaS
                 joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
             keypoints_list.append(joints)
-    # --- 3. Print keypoints in console ---
-    for i, joints in enumerate(keypoints_list):
-        print(f"Person {i+1}:")
-        for j, kp in enumerate(joints):
-            print(f"  Joint {j}: x={kp['x']:.2f}, y={kp['y']:.2f}, z={kp['z']:.4f}, conf={kp['confidence']:.2f}")
     return depth_pil, keypoints_list
-# --- Gradio Interface ---
 iface = gr.Interface(
     fn=run_pose_depth,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Image(type="pil"), gr.JSON()],
-    title="YOLO Pose + MiDaS Depth",
-    description="Upload an image to see the depth map and keypoints X,Y,Z for each person."
 )
 iface.launch()

 transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
 transform = transforms.small_transform
+# -------- Load YOLOv11 Pose Model ----------
 from ultralytics import YOLO
+pose_model = YOLO("yolo11n-pose.pt")  # YOLOv11 pose
 def run_pose_depth(image: Image.Image):
     img = np.array(image)
     if img.shape[2] == 4:
         img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     h, w, _ = img_rgb.shape
+    # --- Depth ---
     input_batch = transform(img_rgb).to(device)
     with torch.no_grad():
         prediction = midas(input_batch)
         align_corners=False
     ).squeeze()
     depth_map = prediction.cpu().numpy()
     depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_img = (depth_norm * 255).astype(np.uint8)
     depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
     depth_pil = Image.fromarray(depth_img)
+    results = pose_model(img_rgb)[0]
     keypoints_list = []
     if results.keypoints is not None:
         for person in results.keypoints:
+            # get keypoints as numpy array
+            keypoints_array = person.data.cpu().numpy()[0]
             joints = []
+            for kp in keypoints_array:
                 x, y, conf = kp
                 px = int(np.clip(x, 0, w-1))
                 py = int(np.clip(y, 0, h-1))
+                z = float(depth_map[py, px])
                 joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
             keypoints_list.append(joints)
     return depth_pil, keypoints_list
 iface = gr.Interface(
     fn=run_pose_depth,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Image(type="pil"), gr.JSON()],
+    title="YOLOv11 Pose + MiDaS Depth",
+    description="Upload image → Pose keypoints (X,Y,Z) with depth."
 )
 iface.launch()