Simultaneous-Segmented-Depth-Prediction

Sleeping

App Files Files Community

Alessio Grancini commited on Feb 13, 2025

Commit

da9e022

verified ·

1 Parent(s): 7ac5451

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -20

app.py CHANGED Viewed

@@ -149,37 +149,57 @@ def get_camera_matrix(depth_estimator):
 @spaces.GPU
 def get_detection_data(image_data):
-    """Get structured detection data with depth information, using Base64 image encoding."""
     try:
-        # Handle both string and dict input formats
         if isinstance(image_data, dict):
-            image = image_data.get('data', '')
         else:
-            image = image_data
-        if not isinstance(image, str):
-            return {"error": f"Invalid input format. Expected string or dict with 'data' key, got {type(image)}"}
-        # Decode base64 image
-        try:
-            img_data = base64.b64decode(image)
-            img = Image.open(BytesIO(img_data))
-            img = np.array(img)
-            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        except Exception as e:
-            return {"error": f"Base64 decoding failed: {str(e)}"}
-        # Process image
         image = utils.resize(img)
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
-        # Prepare structured response with spatial data
         processed_objects = []
         for obj in objects_data:
             cls_id, cls_name, center, mask, color = obj
-            depth_value = depth_at_center(depthmap, [center[0]-10, center[1]-10, center[0]+10, center[1]+10])
             processed_objects.append({
                 "class_id": int(cls_id),
                 "class_name": cls_name,
@@ -188,7 +208,6 @@ def get_detection_data(image_data):
                 "color": [int(c) for c in color]
             })
-        # Encode results
         response = {
             "detections": processed_objects,
             "depth_map": encode_base64_image(depth_colormap),
@@ -200,7 +219,6 @@ def get_detection_data(image_data):
                 "cy": depth_estimator.cy_depth
             }
         }
         return response
     except Exception as e:

 @spaces.GPU
 def get_detection_data(image_data):
+    """
+    Get structured detection data with depth information, using a nested JSON + Base64 image.
+    Expects Lens Studio to send:
+      {
+        "image": {
+          "image": {
+            "data": "data:image/png;base64,<BASE64>"
+          }
+        }
+      }
+    or just a direct string.
+    """
     try:
+        # 1) Extract the nested "data" string if it's a dict
         if isinstance(image_data, dict):
+            # For the structure: {"image": {"image": {"data": "data:image/png;base64,..."}}}
+            nested_dict = image_data.get("image", {}).get("image", {})
+            full_data_url = nested_dict.get("data", "")
         else:
+            # If not a dict, assume it's a direct string
+            full_data_url = image_data
+        if not full_data_url:
+            return {"error": "No base64 data found in input."}
+        # 2) Strip the "data:image/..." prefix if present
+        if full_data_url.startswith("data:image"):
+            # split once on comma => ["data:image/png;base64", "<BASE64>"]
+            _, b64_string = full_data_url.split(",", 1)
+        else:
+            b64_string = full_data_url
+        # 3) Decode base64 -> PIL -> OpenCV
+        img_data = base64.b64decode(b64_string)
+        img = Image.open(BytesIO(img_data))
+        img = np.array(img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        # 4) Process image
         image = utils.resize(img)
         image_segmentation, objects_data = img_seg.predict(image)
         depthmap, depth_colormap = depth_estimator.make_prediction(image)
+        # 5) Prepare structured response
         processed_objects = []
         for obj in objects_data:
             cls_id, cls_name, center, mask, color = obj
+            depth_value = depth_at_center(
+                depthmap,
+                [center[0] - 10, center[1] - 10, center[0] + 10, center[1] + 10]
+            )
             processed_objects.append({
                 "class_id": int(cls_id),
                 "class_name": cls_name,
                 "color": [int(c) for c in color]
             })
         response = {
             "detections": processed_objects,
             "depth_map": encode_base64_image(depth_colormap),
                 "cy": depth_estimator.cy_depth
             }
         }
         return response
     except Exception as e: