Spaces:

gpue
/

foundationpose

Sleeping

Georg Claude Sonnet 4.5 commited on Jan 29

Commit

f7e2564

1 Parent(s): 16d53ca

Update test to verify mask generation and add psutil dependency

Test improvements:
- Call Gradio API directly to receive all 3 outputs (text, viz, mask)
- Verify mask is returned (not None)
- Verify mask shape and dtype are correct
- Upload both RGB + depth images to API
- Check for successful estimation in output text

Bug fix:
- Add psutil==6.1.1 to Dockerfile.base dependencies
- Resolves: "No module named 'psutil'" import error
- Required by FoundationPose modules

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (2) hide show

Dockerfile.base +1 -0
tests/test_estimator.py +79 -34

Dockerfile.base CHANGED Viewed

@@ -71,6 +71,7 @@ RUN pip install --no-cache-dir \
     transformations==2024.6.1 \
     pyyaml==6.0.1 \
     joblib==1.4.0 \
     && pip cache purge
 # Note: nvdiffrast will be built in final Dockerfile on HuggingFace (needs GPU)

     transformations==2024.6.1 \
     pyyaml==6.0.1 \
     joblib==1.4.0 \
+    psutil==6.1.1 \
     && pip cache purge
 # Note: nvdiffrast will be built in final Dockerfile on HuggingFace (needs GPU)

tests/test_estimator.py CHANGED Viewed

@@ -122,10 +122,10 @@ def test_cad_initialization(client, mesh_path):
         return False
-def test_pose_estimation(client, query_image, query_name):
-    """Test pose estimation on a query image via API."""
     print("\n" + "=" * 60)
-    print("Test 3: Pose Estimation via API")
     print("=" * 60)
     print(f"Query image: {query_name}")
@@ -138,31 +138,75 @@ def test_pose_estimation(client, query_image, query_name):
     }
     try:
-        poses = client.estimate_pose(
-            object_id="t_shape",  # Changed to match CAD initialization
-            query_image=query_image,
-            camera_intrinsics=camera_intrinsics
         )
-        if poses and len(poses) > 0:
-            print(f"✓ Pose estimation completed successfully (detected {len(poses)} object(s))")
-            for i, pose in enumerate(poses):
-                print(f"\nDetected Object {i+1}:")
-                print(f"  Position: x={pose['position']['x']:.3f}, "
-                      f"y={pose['position']['y']:.3f}, "
-                      f"z={pose['position']['z']:.3f}")
-                print(f"  Orientation (quaternion): w={pose['orientation']['w']:.3f}, "
-                      f"x={pose['orientation']['x']:.3f}, "
-                      f"y={pose['orientation']['y']:.3f}, "
-                      f"z={pose['orientation']['z']:.3f}")
-                print(f"  Confidence: {pose['confidence']:.3f}")
-            return True
-        else:
-            print("⚠ Pose estimation returned no detections")
-            print("Note: This is expected if the object is not visible in the query image")
             return False
     except Exception as e:
         print(f"✗ Pose estimation failed with exception: {e}")
         import traceback
@@ -201,7 +245,7 @@ def main():
         print(f"✗ {e}")
         return
-    print(f"\n⚠ Note: API currently only supports RGB (depth support coming soon)")
     # Test 1: Initialize API client
     client = test_client_initialization()
@@ -219,8 +263,8 @@ def main():
         print("=" * 60)
         return
-    # Test 3: Estimate pose using RGB image
-    success = test_pose_estimation(client, rgb_image, "rgb_001.jpg")
     # Print final results
     print("\n" + "=" * 60)
@@ -229,16 +273,17 @@ def main():
     print("✓ API client initialization: PASSED")
     print("✓ CAD-based object initialization: PASSED")
     if success:
-        print("✓ Pose estimation with detection: PASSED")
         print("\n🎉 ALL TESTS PASSED")
     else:
-        print("⚠ Pose estimation: No detections (API working, no objects found)")
-        print("\n📊 API TESTS PASSED (2/3 core functions verified)")
-        print("\nNote: No detections may occur if:")
-        print("  - Camera intrinsics don't match the actual camera")
-        print("  - Depth information is not available")
-        print("  - Object segmentation mask is inaccurate")
-        print("  - Images don't match the CAD model closely")
     print("=" * 60)

         return False
+def test_pose_estimation(client, query_image, depth_image, query_name):
+    """Test pose estimation on a query image via API with depth and mask verification."""
     print("\n" + "=" * 60)
+    print("Test 3: Pose Estimation via API (with Depth & Mask)")
     print("=" * 60)
     print(f"Query image: {query_name}")
     }
     try:
+        # Save images to temp files for API upload
+        import tempfile
+        rgb_temp = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
+        depth_temp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+        # Save RGB as JPEG
+        rgb_bgr = cv2.cvtColor(query_image, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(rgb_temp.name, rgb_bgr, [cv2.IMWRITE_JPEG_QUALITY, 95])
+        # Save depth as 16-bit PNG (convert back from meters to mm)
+        depth_uint16 = (depth_image * 1000.0).astype(np.uint16)
+        cv2.imwrite(depth_temp.name, depth_uint16)
+        print(f"Calling API with RGB + Depth images...")
+        # Call Gradio API directly to get all outputs (text, viz, mask)
+        result = client.client.predict(
+            object_id="t_shape",
+            query_image=handle_file(rgb_temp.name),
+            depth_image=handle_file(depth_temp.name),
+            fx=camera_intrinsics["fx"],
+            fy=camera_intrinsics["fy"],
+            cx=camera_intrinsics["cx"],
+            cy=camera_intrinsics["cy"],
+            api_name="/gradio_estimate"
         )
+        # Clean up temp files
+        from pathlib import Path
+        Path(rgb_temp.name).unlink()
+        Path(depth_temp.name).unlink()
+        # Result should be tuple: (text_output, viz_image, mask_image)
+        if not isinstance(result, tuple) or len(result) != 3:
+            print(f"✗ Unexpected result format: {type(result)}, length={len(result) if isinstance(result, tuple) else 'N/A'}")
+            return False
+        text_output, viz_image, mask_image = result
+        print(f"\n✓ API returned 3 outputs as expected")
+        print(f"  - Text output: {len(text_output)} chars")
+        print(f"  - Viz image: {viz_image.shape if viz_image is not None else 'None'}")
+        print(f"  - Mask image: {mask_image.shape if mask_image is not None else 'None'}")
+        # Verify mask was generated
+        if mask_image is None:
+            print(f"✗ Mask was not returned (expected auto-generated mask)")
             return False
+        print(f"✓ Mask returned: shape={mask_image.shape}, dtype={mask_image.dtype}")
+        # Check text output for success/failure
+        if "Error" in text_output or "✗" in text_output:
+            print(f"✗ Estimation failed: {text_output[:200]}")
+            return False
+        # Check if poses were detected
+        if "No poses detected" in text_output or "⚠" in text_output:
+            print(f"⚠ No poses detected (API working, but no objects found)")
+            print(f"Output: {text_output[:300]}")
+            return False
+        # Success - parse output
+        print(f"✓ Pose estimation succeeded!")
+        print(f"\nEstimation output:")
+        print(text_output)
+        return True
     except Exception as e:
         print(f"✗ Pose estimation failed with exception: {e}")
         import traceback
         print(f"✗ {e}")
         return
+    print(f"\n✓ Loaded RGB and depth images - testing with both")
     # Test 1: Initialize API client
     client = test_client_initialization()
         print("=" * 60)
         return
+    # Test 3: Estimate pose using RGB + depth images
+    success = test_pose_estimation(client, rgb_image, depth_image, "rgb_001.jpg")
     # Print final results
     print("\n" + "=" * 60)
     print("✓ API client initialization: PASSED")
     print("✓ CAD-based object initialization: PASSED")
     if success:
+        print("✓ Pose estimation with RGB+depth: PASSED")
+        print("✓ Mask generation verification: PASSED")
         print("\n🎉 ALL TESTS PASSED")
     else:
+        print("⚠ Pose estimation: Issues detected (see output above)")
+        print("\n📊 API TESTS PARTIALLY PASSED (2/3 core functions verified)")
+        print("\nPossible reasons for no detections:")
+        print("  - Camera intrinsics mismatch")
+        print("  - Object not visible or occluded in image")
+        print("  - Depth data quality issues")
+        print("  - Mask segmentation inaccurate")
     print("=" * 60)