import os import cv2 import torch import numpy as np import gradio as gr from detectron2.config import get_cfg from detectron2.engine import DefaultPredictor from detectron2 import model_zoo from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog # Setup Detectron2 model cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" )) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) cfg.MODEL.DEVICE = "cpu" # Ensure CPU for Hugging Face Spaces predictor = DefaultPredictor(cfg) metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) # Distance calculation helper def calculate_pixel_distance(box1, box2): x1, y1 = (box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2 x2, y2 = (box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2 return int(np.linalg.norm([x2 - x1, y2 - y1])) def detect_objects(image): image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) outputs = predictor(image_rgb) instances = outputs["instances"].to("cpu") boxes = instances.pred_boxes.tensor.numpy() classes = instances.pred_classes.numpy() class_names = [metadata.get("thing_classes", [])[i] for i in classes] v = Visualizer(image_rgb, metadata, scale=1.0) out = v.draw_instance_predictions(instances) annotated = out.get_image() # Prepare object list with indices objects = [f"{i}: {name}" for i, name in enumerate(class_names)] return annotated, boxes.tolist(), objects # Store detected boxes across calls global_boxes = [] def interface(image): global global_boxes annotated, boxes, labels = detect_objects(image) global_boxes = boxes return annotated, gr.update(choices=labels, value=[]), gr.update(choices=labels, value=[]) def measure_distance(idx1, idx2): try: box1 = global_boxes[int(idx1.split(":")[0])] box2 = global_boxes[int(idx2.split(":")[0])] pixel_dist = calculate_pixel_distance(box1, box2) return f"Pixel distance: {pixel_dist}px" except Exception: return "Error in selection. Please try again." # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🧠 Detectron2 Object Detection + Distance Estimation") with gr.Row(): input_img = gr.Image(type="numpy", label="Upload Image") output_img = gr.Image(type="numpy", label="Detected Image") with gr.Row(): obj1 = gr.Dropdown(label="Select Object 1") obj2 = gr.Dropdown(label="Select Object 2") distance_btn = gr.Button("Calculate Distance") distance_output = gr.Textbox(label="Result") clear_btn = gr.Button("Clear") input_img.change(fn=interface, inputs=input_img, outputs=[output_img, obj1, obj2]) distance_btn.click(fn=measure_distance, inputs=[obj1, obj2], outputs=distance_output) clear_btn.click(lambda: [None, None, None, "", []], inputs=[], outputs=[input_img, output_img, distance_output, obj1, obj2]) demo.launch()