farid678 commited on
Commit
73dcbc4
·
verified ·
1 Parent(s): 89a69e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -12,19 +12,18 @@ midas.eval()
12
  transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
13
  transform = transforms.small_transform
14
 
15
- # -------- Load YOLOv8 Pose Model ----------
16
  from ultralytics import YOLO
17
- pose_model = YOLO("yolov8n-pose.pt") # small model for speed
18
 
19
  def run_pose_depth(image: Image.Image):
20
- # Convert PIL to OpenCV RGB
21
  img = np.array(image)
22
  if img.shape[2] == 4:
23
  img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
24
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
25
  h, w, _ = img_rgb.shape
26
 
27
- # --- 1. Depth Estimation ---
28
  input_batch = transform(img_rgb).to(device)
29
  with torch.no_grad():
30
  prediction = midas(input_batch)
@@ -35,48 +34,35 @@ def run_pose_depth(image: Image.Image):
35
  align_corners=False
36
  ).squeeze()
37
  depth_map = prediction.cpu().numpy()
38
-
39
- # Normalize for visualization
40
  depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
41
  depth_img = (depth_norm * 255).astype(np.uint8)
42
  depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
43
  depth_pil = Image.fromarray(depth_img)
44
 
45
- # --- 2. Pose Estimation ---
46
- results = pose_model(img_rgb)[0] # first image
47
  keypoints_list = []
48
 
49
  if results.keypoints is not None:
50
  for person in results.keypoints:
 
 
51
  joints = []
52
- person_array = person.cpu().numpy() # convert tensor to numpy
53
- for kp in person_array:
54
- kp = np.squeeze(kp) # flatten if necessary
55
- if len(kp) < 3:
56
- continue
57
  x, y, conf = kp
58
- # Clamp x,y
59
  px = int(np.clip(x, 0, w-1))
60
  py = int(np.clip(y, 0, h-1))
61
- z = float(depth_map[py, px]) # Z from MiDaS
62
  joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
63
  keypoints_list.append(joints)
64
 
65
- # --- 3. Print keypoints in console ---
66
- for i, joints in enumerate(keypoints_list):
67
- print(f"Person {i+1}:")
68
- for j, kp in enumerate(joints):
69
- print(f" Joint {j}: x={kp['x']:.2f}, y={kp['y']:.2f}, z={kp['z']:.4f}, conf={kp['confidence']:.2f}")
70
-
71
  return depth_pil, keypoints_list
72
 
73
- # --- Gradio Interface ---
74
  iface = gr.Interface(
75
  fn=run_pose_depth,
76
  inputs=gr.Image(type="pil"),
77
  outputs=[gr.Image(type="pil"), gr.JSON()],
78
- title="YOLO Pose + MiDaS Depth",
79
- description="Upload an image to see the depth map and keypoints X,Y,Z for each person."
80
  )
81
 
82
  iface.launch()
 
12
  transforms = torch.hub.load("isl-org/MiDaS", "transforms", trust_repo=True)
13
  transform = transforms.small_transform
14
 
15
+ # -------- Load YOLOv11 Pose Model ----------
16
  from ultralytics import YOLO
17
+ pose_model = YOLO("yolo11n-pose.pt") # YOLOv11 pose
18
 
19
  def run_pose_depth(image: Image.Image):
 
20
  img = np.array(image)
21
  if img.shape[2] == 4:
22
  img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
23
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
24
  h, w, _ = img_rgb.shape
25
 
26
+ # --- Depth ---
27
  input_batch = transform(img_rgb).to(device)
28
  with torch.no_grad():
29
  prediction = midas(input_batch)
 
34
  align_corners=False
35
  ).squeeze()
36
  depth_map = prediction.cpu().numpy()
 
 
37
  depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
38
  depth_img = (depth_norm * 255).astype(np.uint8)
39
  depth_img = cv2.applyColorMap(depth_img, cv2.COLORMAP_MAGMA)
40
  depth_pil = Image.fromarray(depth_img)
41
 
42
+ results = pose_model(img_rgb)[0]
 
43
  keypoints_list = []
44
 
45
  if results.keypoints is not None:
46
  for person in results.keypoints:
47
+ # get keypoints as numpy array
48
+ keypoints_array = person.data.cpu().numpy()[0]
49
  joints = []
50
+ for kp in keypoints_array:
 
 
 
 
51
  x, y, conf = kp
 
52
  px = int(np.clip(x, 0, w-1))
53
  py = int(np.clip(y, 0, h-1))
54
+ z = float(depth_map[py, px])
55
  joints.append({"x": float(x), "y": float(y), "z": z, "confidence": float(conf)})
56
  keypoints_list.append(joints)
57
 
 
 
 
 
 
 
58
  return depth_pil, keypoints_list
59
 
 
60
  iface = gr.Interface(
61
  fn=run_pose_depth,
62
  inputs=gr.Image(type="pil"),
63
  outputs=[gr.Image(type="pil"), gr.JSON()],
64
+ title="YOLOv11 Pose + MiDaS Depth",
65
+ description="Upload image Pose keypoints (X,Y,Z) with depth."
66
  )
67
 
68
  iface.launch()