Spaces:

Firoj112
/

WebAgents_

Paused

App Files Files Community

Firoj112 commited on May 5, 2025

Commit

7a89a47

verified ·

1 Parent(s): 3064cc7

Create detect_elements.py

Browse files

Files changed (1) hide show

tools/detect_elements.py +69 -0

tools/detect_elements.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from smolagents.tools import Tool
+import cv2
+import numpy as np
+import os
+def detect_elements(screenshot_path, element_type="table"):
+    """
+    Detect table-like structures or text boxes in a screenshot using OpenCV.
+    Args:
+        screenshot_path (str): Path to the screenshot
+        element_type (str): Type of element to detect ('table', 'textbox') (default: 'table')
+    Returns:
+        str: JSON with bounding boxes and detection details
+    """
+    try:
+        if not os.path.exists(screenshot_path):
+            return f"Screenshot not found: {screenshot_path}"
+        # Read and preprocess image
+        image = cv2.imread(screenshot_path)
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+        edges = cv2.Canny(blurred, 50, 150)
+        # Detect contours
+        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        detections = []
+        for contour in contours:
+            x, y, w, h = cv2.boundingRect(contour)
+            area = w * h
+            aspect_ratio = w / h if h > 0 else 0
+            # Filter for tables (rectangular, large area)
+            if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
+                detections.append({"type": "table", "bbox": [x, y, w, h]})
+            # Filter for text boxes (narrow, horizontal)
+            elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
+                detections.append({"type": "textbox", "bbox": [x, y, w, h]})
+        # Draw bounding boxes on a copy of the image
+        output_path = screenshot_path.replace(".png", "_detected.png")
+        output_image = image.copy()
+        for detection in detections:
+            x, y, w, h = detection["bbox"]
+            color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
+            cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
+        cv2.imwrite(output_path, output_image)
+        return json.dumps({
+            "detections": detections,
+            "output_image": output_path
+        }) if detections else "No elements detected"
+    except Exception as e:
+        return f"Failed to detect elements: {str(e)}"
+# Register the tool
+tool = Tool(
+    name="detect_elements",
+    description="Detects table-like structures or text boxes in a screenshot using OpenCV.",
+    inputs={
+        "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
+        "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
+    },
+    output_type="str",
+    function=detect_elements
+)