Spaces:

farid678
/

midas

Runtime error

App Files Files Community

farid678 commited on Dec 16, 2025

Commit

89a69e2

verified ·

1 Parent(s): 0c3dc16

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -9

app.py CHANGED Viewed

@@ -13,9 +13,8 @@ transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
 transform = transforms.small_transform
 # -------- Load YOLOv8 Pose Model ----------
-# Using ultralytics hub (needs internet)
 from ultralytics import YOLO
-pose_model = YOLO("yolov8n-pose.pt")  # small model, fast
 def run_pose_depth(image: Image.Image):
     # Convert PIL to OpenCV RGB
@@ -25,7 +24,7 @@ def run_pose_depth(image: Image.Image):
     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     h, w, _ = img_rgb.shape
-    # --- 1. Depth ---
     input_batch = transform(img_rgb).to(device)
     with torch.no_grad():
         prediction = midas(input_batch)
@@ -36,6 +35,8 @@ def run_pose_depth(image: Image.Image):
         align_corners=False
     ).squeeze()
     depth_map = prediction.cpu().numpy()
     depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_img = (depth_norm * 255).astype(np.uint8)
     depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
@@ -48,17 +49,20 @@ def run_pose_depth(image: Image.Image):
     if results.keypoints is not None:
         for person in results.keypoints:
             joints = []
-            for kp in person:  # kp = [x, y, confidence]
-                print(kp)
                 x, y, conf = kp
                 # Clamp x,y
                 px = int(np.clip(x, 0, w-1))
                 py = int(np.clip(y, 0, h-1))
-                z = depth_map[py, px]  # Z from MiDaS
-                joints.append({"x": float(x), "y": float(y), "z": float(z), "confidence": float(conf)})
             keypoints_list.append(joints)
-    # --- 3. Print keypoints ---
     for i, joints in enumerate(keypoints_list):
         print(f"Person {i+1}:")
         for j, kp in enumerate(joints):
@@ -72,7 +76,7 @@ iface = gr.Interface(
     inputs=gr.Image(type="pil"),
     outputs=[gr.Image(type="pil"), gr.JSON()],
     title="YOLO Pose + MiDaS Depth",
-    description="Upload an image: pose keypoints with X,Y,Z from MiDaS depth."
 )
 iface.launch()

 transform = transforms.small_transform
 # -------- Load YOLOv8 Pose Model ----------
 from ultralytics import YOLO
+pose_model = YOLO("yolov8n-pose.pt")  # small model for speed
 def run_pose_depth(image: Image.Image):
     # Convert PIL to OpenCV RGB
     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     h, w, _ = img_rgb.shape
+    # --- 1. Depth Estimation ---
     input_batch = transform(img_rgb).to(device)
     with torch.no_grad():
         prediction = midas(input_batch)
         align_corners=False
     ).squeeze()
     depth_map = prediction.cpu().numpy()
+    # Normalize for visualization
     depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_img = (depth_norm * 255).astype(np.uint8)
     depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
     if results.keypoints is not None:
         for person in results.keypoints:
             joints = []
+            person_array = person.cpu().numpy()  # convert tensor to numpy
+            for kp in person_array:
+                kp = np.squeeze(kp)  # flatten if necessary
+                if len(kp) < 3:
+                    continue
                 x, y, conf = kp
                 # Clamp x,y
                 px = int(np.clip(x, 0, w-1))
                 py = int(np.clip(y, 0, h-1))
+                z = float(depth_map[py, px])  # Z from MiDaS
+                joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
             keypoints_list.append(joints)
+    # --- 3. Print keypoints in console ---
     for i, joints in enumerate(keypoints_list):
         print(f"Person {i+1}:")
         for j, kp in enumerate(joints):
     inputs=gr.Image(type="pil"),
     outputs=[gr.Image(type="pil"), gr.JSON()],
     title="YOLO Pose + MiDaS Depth",
+    description="Upload an image to see the depth map and keypoints X,Y,Z for each person."
 )
 iface.launch()