Spaces:

dxcanh
/

objects_detection

Sleeping

App Files Files Community

dxcanh commited on Apr 1, 2025

Commit

94cfd9f

verified ·

1 Parent(s): 198d582

Upload test.py

Browse files

Files changed (1) hide show

test.py +211 -0

test.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import gradio as gr
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+def extract_all(image: np.ndarray, area_threshold: int = 100, lower_thresh: int = 100, upper_thresh: int = 200) -> dict:
+    if len(image.shape) == 3:
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = image.copy()
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    edges = cv2.Canny(blurred, lower_thresh, upper_thresh)
+    kernel = np.ones((3, 3), np.uint8)
+    closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=3)
+    kernel = np.ones((5, 5), np.uint8)
+    closed_edges = cv2.dilate(closed_edges, kernel, iterations=1)
+    kernel = np.ones((3, 3), np.uint8)
+    closed_edges = cv2.morphologyEx(closed_edges, cv2.MORPH_CLOSE, kernel, iterations=2)
+    cv2.imwrite("canny_binary.jpg", closed_edges)
+    contours, _ = cv2.findContours(closed_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    real_islands = {}
+    contour_id = 0
+    for contour in contours:
+        if cv2.contourArea(contour) > area_threshold:
+            mask = np.zeros_like(gray)
+            cv2.drawContours(mask, [contour], -1, 255, thickness=cv2.FILLED)
+            pixels = list(zip(*np.where(mask == 255)))
+            real_islands[(pixels[0][0], pixels[0][1])] = pixels
+            contour_id += 1
+    print(f"Detected {len(real_islands)} islands from {len(contours)} contours")
+    return real_islands
+def extract_object(image: np.ndarray, island: list[tuple]) -> np.ndarray:
+    coords = np.array(island)
+    min_y, min_x = coords.min(axis=0)
+    max_y, max_x = coords.max(axis=0)
+    height, width = max_y - min_y + 1, max_x - min_x + 1
+    num_channels = image.shape[2] if len(image.shape) == 3 else 1
+    result = np.zeros((height, width, num_channels), dtype=np.uint8)
+    y_coords = coords[:, 0] - min_y
+    x_coords = coords[:, 1] - min_x
+    result[y_coords, x_coords] = image[coords[:, 0], coords[:, 1]]
+    return result
+def draw_bound(img: np.ndarray, top: int, down: int, left: int, right: int, size: int, color=(0, 255, 0)) -> np.ndarray:
+    img_copy = img.copy()
+    cv2.rectangle(img_copy, (left, top), (right, top + size), color, thickness=-1)
+    cv2.rectangle(img_copy, (left, down - size), (right, down), color, thickness=-1)
+    cv2.rectangle(img_copy, (left, top), (left + size, down), color, thickness=-1)
+    cv2.rectangle(img_copy, (right - size, top), (right, down), color, thickness=-1)
+    return img_copy
+def compute_template_matching(img: np.ndarray, template: np.ndarray, method, mask: np.ndarray):
+    n_img = img.astype(np.uint8)
+    n_template = template.astype(np.uint8)
+    if np.std(n_template) == 0:
+        raise ValueError("Standard = 0")
+    if np.std(n_img) == 0:
+        raise ValueError("Standard = 0")
+    result = cv2.matchTemplate(n_img, n_template, method, mask=mask)
+    result = np.where(np.isinf(result), 0, result)
+    return result
+def process_single_object_loop(img: np.ndarray, template: np.ndarray, method, mask: np.ndarray):
+    result = compute_template_matching(img, template, method, mask)
+    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
+    top_left = max_loc
+    bound_image = draw_bound(
+        img,
+        top_left[1],
+        top_left[1] + template.shape[0],
+        top_left[0],
+        top_left[0] + template.shape[1],
+        8,
+        (0, 255, 0)
+    )
+    return max_val, result, bound_image, (top_left[1], top_left[0])
+def process_template_at_scale(source: np.ndarray, template: np.ndarray, method, scale: float):
+    masked_template = template.copy().astype(np.uint8)
+    temp = cv2.medianBlur(masked_template.copy(), 5)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+    temp = cv2.erode(temp, kernel, iterations=1)
+    _, mask = cv2.threshold(temp, 1, 255, cv2.THRESH_BINARY)
+    mask = cv2.resize(mask, (int(mask.shape[1] * scale), int(mask.shape[0] * scale)), interpolation=cv2.INTER_NEAREST_EXACT)
+    masked_template = cv2.resize(masked_template, (mask.shape[1], mask.shape[0]), interpolation=cv2.INTER_NEAREST_EXACT)
+    local_max, result, bound_image, pos = process_single_object_loop(source.copy(), masked_template, method, mask.astype(np.uint8))
+    max_template = np.zeros_like(masked_template)
+    max_template[mask.astype(bool)] = masked_template[mask.astype(bool)]
+    return local_max, result, bound_image, max_template, pos
+def process_images(source_img, objects_img, confidence_threshold=0.7):
+    if isinstance(source_img, np.ndarray):
+        source = source_img
+    else:
+        source = np.array(source_img)[:, :, ::-1]  # RGB -> BGR
+    if isinstance(objects_img, np.ndarray):
+        objects = objects_img
+    else:
+        objects = np.array(objects_img)[:, :, ::-1]  # RGB -> BGR
+    object_img = cv2.medianBlur(objects.copy(), 3)
+    islands = extract_all(object_img, area_threshold=100, lower_thresh=100, upper_thresh=200)
+    objects_extracted = []
+    for island in islands.values():
+        object_image = extract_object(objects, island)
+        objects_extracted.append(object_image)
+    result_image = source.copy()
+    method = cv2.TM_CCOEFF_NORMED
+    print("\nProcessing object detection...")
+    print(f"Confidence threshold: {confidence_threshold}")
+    print(f"Total objects to detect: {len(objects_extracted)}\n")
+    for i, template in enumerate(objects_extracted):
+        print(f"\nProcessing object {i+1}/{len(objects_extracted)}")
+        max_val = 0
+        max_pos = None
+        max_template = None
+        scale_steps = np.linspace(0.25, 1.0, 20)
+        for scale in scale_steps:
+            local_max, _, temp_bound_image, local_template, pos = process_template_at_scale(
+                source, template, method, scale
+            )
+            print(f"Scale {scale:.2f}: Confidence = {local_max:.4f}")
+            if local_max > max_val:
+                max_val = local_max
+                max_template = local_template
+                max_pos = pos
+            if max_val >= confidence_threshold:
+                print(f"Stopping at scale {scale:.2f} as confidence {max_val:.4f} >= threshold")
+                break
+        print(f"Final confidence for object {i+1}: {max_val:.4f}")
+        if max_pos is not None and max_val >= confidence_threshold:
+            h, w = max_template.shape[:2]
+            result_image = draw_bound(
+                result_image,
+                max_pos[0],
+                max_pos[0] + h,
+                max_pos[1],
+                max_pos[1] + w,
+                8,
+                (0, 255, 0)
+            )
+            cv2.putText(
+                result_image,
+                f"{i+1}",
+                (max_pos[1], max_pos[0]-10),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.9,
+                (0, 255, 0),
+                2
+            )
+            print(f"Object {i+1} detected at position ({max_pos[0]}, {max_pos[1]}) with size ({h}x{w})")
+        else:
+            print(f"Object {i+1} not detected (confidence {max_val:.4f} < threshold {confidence_threshold})")
+    print("\nDetection completed!")
+    return result_image
+# create a Gradio interface
+with gr.Blocks(title="Object Detection in Images") as demo:
+    gr.Markdown("# Object Detection in Images")
+    gr.Markdown("Upload a source image and an objects image to detect and draw bounding boxes around matching objects.")
+    with gr.Row():
+        with gr.Column():
+            source_input = gr.Image(label="Source Image", type="numpy")
+            objects_input = gr.Image(label="Objects Image", type="numpy")
+            threshold_input = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.01,
+                label="Confidence Threshold"
+            )
+            submit_btn = gr.Button("Detect Objects")
+        with gr.Column():
+            output_image = gr.Image(label="Result with Bounding Boxes", type="numpy")
+    submit_btn.click(
+        fn=process_images,
+        inputs=[source_input, objects_input, threshold_input],
+        outputs=output_image
+    )
+demo.launch()