File size: 3,070 Bytes
bb78000
 
 
 
495db4c
bb78000
 
 
 
 
495db4c
bb78000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495db4c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import cv2
import torch
import numpy as np
import gradio as gr
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# Setup Detectron2 model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
)
cfg.MODEL.DEVICE = "cpu"  # Ensure CPU for Hugging Face Spaces

predictor = DefaultPredictor(cfg)
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])

# Distance calculation helper
def calculate_pixel_distance(box1, box2):
    x1, y1 = (box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2
    x2, y2 = (box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2
    return int(np.linalg.norm([x2 - x1, y2 - y1]))

def detect_objects(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    outputs = predictor(image_rgb)
    instances = outputs["instances"].to("cpu")
    boxes = instances.pred_boxes.tensor.numpy()
    classes = instances.pred_classes.numpy()
    class_names = [metadata.get("thing_classes", [])[i] for i in classes]

    v = Visualizer(image_rgb, metadata, scale=1.0)
    out = v.draw_instance_predictions(instances)
    annotated = out.get_image()

    # Prepare object list with indices
    objects = [f"{i}: {name}" for i, name in enumerate(class_names)]
    return annotated, boxes.tolist(), objects

# Store detected boxes across calls
global_boxes = []

def interface(image):
    global global_boxes
    annotated, boxes, labels = detect_objects(image)
    global_boxes = boxes
    return annotated, gr.update(choices=labels, value=[]), gr.update(choices=labels, value=[])

def measure_distance(idx1, idx2):
    try:
        box1 = global_boxes[int(idx1.split(":")[0])]
        box2 = global_boxes[int(idx2.split(":")[0])]
        pixel_dist = calculate_pixel_distance(box1, box2)
        return f"Pixel distance: {pixel_dist}px"
    except Exception:
        return "Error in selection. Please try again."

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Detectron2 Object Detection + Distance Estimation")
    with gr.Row():
        input_img = gr.Image(type="numpy", label="Upload Image")
        output_img = gr.Image(type="numpy", label="Detected Image")
    with gr.Row():
        obj1 = gr.Dropdown(label="Select Object 1")
        obj2 = gr.Dropdown(label="Select Object 2")
    distance_btn = gr.Button("Calculate Distance")
    distance_output = gr.Textbox(label="Result")
    clear_btn = gr.Button("Clear")

    input_img.change(fn=interface, inputs=input_img, outputs=[output_img, obj1, obj2])
    distance_btn.click(fn=measure_distance, inputs=[obj1, obj2], outputs=distance_output)
    clear_btn.click(lambda: [None, None, None, "", []], inputs=[], outputs=[input_img, output_img, distance_output, obj1, obj2])

demo.launch()