File size: 3,830 Bytes
3955743
 
 
 
 
 
 
 
 
9decefc
 
3955743
9decefc
 
 
 
3955743
9decefc
 
 
 
3955743
 
9decefc
 
 
3955743
9decefc
 
3955743
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9decefc
3955743
 
 
 
9decefc
3955743
 
 
 
 
9decefc
 
3955743
 
9decefc
3955743
 
9decefc
 
3955743
9decefc
3955743
9decefc
 
 
3955743
 
 
 
9decefc
3955743
 
 
 
 
 
 
9decefc
3955743
 
 
 
 
 
 
 
 
9decefc
3955743
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
import torch
from torchvision.ops import nms
from PIL import Image, ImageDraw

from transformers import AutoImageProcessor, AutoModelForObjectDetection

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

YOLOS_REPO_ID = "adcelis/yolo_finetuned_raccoon"
DETR_REPO_ID  = "adcelis/detr_finetuned_raccoon"

# Cargar YOLOS
proc_yolos = AutoImageProcessor.from_pretrained(YOLOS_REPO_ID)
model_yolos = AutoModelForObjectDetection.from_pretrained(YOLOS_REPO_ID).to(DEVICE)
model_yolos.eval()

# Cargar DETR
proc_detr = AutoImageProcessor.from_pretrained(DETR_REPO_ID)
model_detr = AutoModelForObjectDetection.from_pretrained(DETR_REPO_ID).to(DEVICE)
model_detr.eval()

@torch.no_grad()
def predict_tf(pil_img, processor, model, conf):
    inputs = processor(images=[pil_img], return_tensors="pt").to(DEVICE)
    outputs = model(**inputs)
    target_sizes = torch.tensor([[pil_img.size[1], pil_img.size[0]]], device=DEVICE)
    res = processor.post_process_object_detection(outputs, threshold=conf, target_sizes=target_sizes)[0]
    return res["boxes"].cpu(), res["scores"].cpu(), res["labels"].cpu()

def ensemble_union_nms(boxes1, scores1, labels1, boxes2, scores2, labels2,
                       w2=0.8, iou_thr=0.5, score_thr=0.25):
    boxes = torch.cat([boxes1, boxes2], dim=0)
    scores = torch.cat([scores1, scores2 * w2], dim=0)
    labels = torch.cat([labels1, labels2], dim=0)

    keep = scores >= score_thr
    boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
    if boxes.numel() == 0:
        return boxes, scores, labels

    keep_all = []
    for cls in labels.unique():
        idx = torch.where(labels == cls)[0]
        k = nms(boxes[idx], scores[idx], iou_thr)
        keep_all.append(idx[k])

    keep_all = torch.cat(keep_all)
    keep_all = keep_all[scores[keep_all].argsort(descending=True)]
    return boxes[keep_all], scores[keep_all], labels[keep_all]

def draw_boxes(pil_img, boxes, scores, labels, id2label):
    img = pil_img.copy()
    draw = ImageDraw.Draw(img)
    for b, s, l in zip(boxes, scores, labels):
        x1, y1, x2, y2 = [float(x) for x in b.tolist()]
        draw.rectangle((x1, y1, x2, y2), outline="green", width=2)
        name = id2label.get(int(l), str(int(l)))
        draw.text((x1, y1), f"{name} {float(s):.2f}", fill="black")
    return img

def run(pil_img, yolos_conf, detr_conf, w2, iou_thr, score_thr):
    pil_img = pil_img.convert("RGB")

    b1, s1, l1 = predict_tf(pil_img, proc_yolos, model_yolos, yolos_conf)
    b2, s2, l2 = predict_tf(pil_img, proc_detr,  model_detr,  detr_conf)

    be, se, le = ensemble_union_nms(b1, s1, l1, b2, s2, l2, w2=w2, iou_thr=iou_thr, score_thr=score_thr)

    # id2label (misma clase en ambos: raccoon)
    id2label = model_yolos.config.id2label
    out_img = draw_boxes(pil_img, be, se, le, id2label)

    rows = []
    for b, s, l in zip(be, se, le):
        x1, y1, x2, y2 = [round(float(x), 2) for x in b.tolist()]
        rows.append([id2label.get(int(l), str(int(l))), round(float(s), 3), x1, y1, x2, y2])

    return out_img, rows

demo = gr.Interface(
    fn=run,
    inputs=[
        gr.Image(type="pil", label="Imagen"),
        gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="YOLOS conf"),
        gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="DETR conf"),
        gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Peso DETR (w2)"),
        gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="NMS IoU"),
        gr.Slider(0.05, 0.9, value=0.25, step=0.05, label="Score mínimo (post-ensemble)"),
    ],
    outputs=[
        gr.Image(type="pil", label="Ensemble (NMS)"),
        gr.Dataframe(headers=["label", "score", "x1", "y1", "x2", "y2"], label="Detecciones"),
    ],
    title="Ensemble YOLOS + DETR con Non-Maximum Suppression",
)

if __name__ == "__main__":
    demo.launch()