File size: 3,830 Bytes
3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 9decefc 3955743 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | import gradio as gr
import torch
from torchvision.ops import nms
from PIL import Image, ImageDraw
from transformers import AutoImageProcessor, AutoModelForObjectDetection
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
YOLOS_REPO_ID = "adcelis/yolo_finetuned_raccoon"
DETR_REPO_ID = "adcelis/detr_finetuned_raccoon"
# Cargar YOLOS
proc_yolos = AutoImageProcessor.from_pretrained(YOLOS_REPO_ID)
model_yolos = AutoModelForObjectDetection.from_pretrained(YOLOS_REPO_ID).to(DEVICE)
model_yolos.eval()
# Cargar DETR
proc_detr = AutoImageProcessor.from_pretrained(DETR_REPO_ID)
model_detr = AutoModelForObjectDetection.from_pretrained(DETR_REPO_ID).to(DEVICE)
model_detr.eval()
@torch.no_grad()
def predict_tf(pil_img, processor, model, conf):
inputs = processor(images=[pil_img], return_tensors="pt").to(DEVICE)
outputs = model(**inputs)
target_sizes = torch.tensor([[pil_img.size[1], pil_img.size[0]]], device=DEVICE)
res = processor.post_process_object_detection(outputs, threshold=conf, target_sizes=target_sizes)[0]
return res["boxes"].cpu(), res["scores"].cpu(), res["labels"].cpu()
def ensemble_union_nms(boxes1, scores1, labels1, boxes2, scores2, labels2,
w2=0.8, iou_thr=0.5, score_thr=0.25):
boxes = torch.cat([boxes1, boxes2], dim=0)
scores = torch.cat([scores1, scores2 * w2], dim=0)
labels = torch.cat([labels1, labels2], dim=0)
keep = scores >= score_thr
boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
if boxes.numel() == 0:
return boxes, scores, labels
keep_all = []
for cls in labels.unique():
idx = torch.where(labels == cls)[0]
k = nms(boxes[idx], scores[idx], iou_thr)
keep_all.append(idx[k])
keep_all = torch.cat(keep_all)
keep_all = keep_all[scores[keep_all].argsort(descending=True)]
return boxes[keep_all], scores[keep_all], labels[keep_all]
def draw_boxes(pil_img, boxes, scores, labels, id2label):
img = pil_img.copy()
draw = ImageDraw.Draw(img)
for b, s, l in zip(boxes, scores, labels):
x1, y1, x2, y2 = [float(x) for x in b.tolist()]
draw.rectangle((x1, y1, x2, y2), outline="green", width=2)
name = id2label.get(int(l), str(int(l)))
draw.text((x1, y1), f"{name} {float(s):.2f}", fill="black")
return img
def run(pil_img, yolos_conf, detr_conf, w2, iou_thr, score_thr):
pil_img = pil_img.convert("RGB")
b1, s1, l1 = predict_tf(pil_img, proc_yolos, model_yolos, yolos_conf)
b2, s2, l2 = predict_tf(pil_img, proc_detr, model_detr, detr_conf)
be, se, le = ensemble_union_nms(b1, s1, l1, b2, s2, l2, w2=w2, iou_thr=iou_thr, score_thr=score_thr)
# id2label (misma clase en ambos: raccoon)
id2label = model_yolos.config.id2label
out_img = draw_boxes(pil_img, be, se, le, id2label)
rows = []
for b, s, l in zip(be, se, le):
x1, y1, x2, y2 = [round(float(x), 2) for x in b.tolist()]
rows.append([id2label.get(int(l), str(int(l))), round(float(s), 3), x1, y1, x2, y2])
return out_img, rows
demo = gr.Interface(
fn=run,
inputs=[
gr.Image(type="pil", label="Imagen"),
gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="YOLOS conf"),
gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="DETR conf"),
gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Peso DETR (w2)"),
gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="NMS IoU"),
gr.Slider(0.05, 0.9, value=0.25, step=0.05, label="Score mínimo (post-ensemble)"),
],
outputs=[
gr.Image(type="pil", label="Ensemble (NMS)"),
gr.Dataframe(headers=["label", "score", "x1", "y1", "x2", "y2"], label="Detecciones"),
],
title="Ensemble YOLOS + DETR con Non-Maximum Suppression",
)
if __name__ == "__main__":
demo.launch()
|