| import gradio as gr |
| import torch |
| from torchvision.ops import nms |
| from PIL import Image, ImageDraw |
|
|
| from transformers import AutoImageProcessor, AutoModelForObjectDetection |
|
|
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| YOLOS_REPO_ID = "adcelis/yolo_finetuned_raccoon" |
| DETR_REPO_ID = "adcelis/detr_finetuned_raccoon" |
|
|
| |
| proc_yolos = AutoImageProcessor.from_pretrained(YOLOS_REPO_ID) |
| model_yolos = AutoModelForObjectDetection.from_pretrained(YOLOS_REPO_ID).to(DEVICE) |
| model_yolos.eval() |
|
|
| |
| proc_detr = AutoImageProcessor.from_pretrained(DETR_REPO_ID) |
| model_detr = AutoModelForObjectDetection.from_pretrained(DETR_REPO_ID).to(DEVICE) |
| model_detr.eval() |
|
|
| @torch.no_grad() |
| def predict_tf(pil_img, processor, model, conf): |
| inputs = processor(images=[pil_img], return_tensors="pt").to(DEVICE) |
| outputs = model(**inputs) |
| target_sizes = torch.tensor([[pil_img.size[1], pil_img.size[0]]], device=DEVICE) |
| res = processor.post_process_object_detection(outputs, threshold=conf, target_sizes=target_sizes)[0] |
| return res["boxes"].cpu(), res["scores"].cpu(), res["labels"].cpu() |
|
|
| def ensemble_union_nms(boxes1, scores1, labels1, boxes2, scores2, labels2, |
| w2=0.8, iou_thr=0.5, score_thr=0.25): |
| boxes = torch.cat([boxes1, boxes2], dim=0) |
| scores = torch.cat([scores1, scores2 * w2], dim=0) |
| labels = torch.cat([labels1, labels2], dim=0) |
|
|
| keep = scores >= score_thr |
| boxes, scores, labels = boxes[keep], scores[keep], labels[keep] |
| if boxes.numel() == 0: |
| return boxes, scores, labels |
|
|
| keep_all = [] |
| for cls in labels.unique(): |
| idx = torch.where(labels == cls)[0] |
| k = nms(boxes[idx], scores[idx], iou_thr) |
| keep_all.append(idx[k]) |
|
|
| keep_all = torch.cat(keep_all) |
| keep_all = keep_all[scores[keep_all].argsort(descending=True)] |
| return boxes[keep_all], scores[keep_all], labels[keep_all] |
|
|
| def draw_boxes(pil_img, boxes, scores, labels, id2label): |
| img = pil_img.copy() |
| draw = ImageDraw.Draw(img) |
| for b, s, l in zip(boxes, scores, labels): |
| x1, y1, x2, y2 = [float(x) for x in b.tolist()] |
| draw.rectangle((x1, y1, x2, y2), outline="green", width=2) |
| name = id2label.get(int(l), str(int(l))) |
| draw.text((x1, y1), f"{name} {float(s):.2f}", fill="black") |
| return img |
|
|
| def run(pil_img, yolos_conf, detr_conf, w2, iou_thr, score_thr): |
| pil_img = pil_img.convert("RGB") |
|
|
| b1, s1, l1 = predict_tf(pil_img, proc_yolos, model_yolos, yolos_conf) |
| b2, s2, l2 = predict_tf(pil_img, proc_detr, model_detr, detr_conf) |
|
|
| be, se, le = ensemble_union_nms(b1, s1, l1, b2, s2, l2, w2=w2, iou_thr=iou_thr, score_thr=score_thr) |
|
|
| |
| id2label = model_yolos.config.id2label |
| out_img = draw_boxes(pil_img, be, se, le, id2label) |
|
|
| rows = [] |
| for b, s, l in zip(be, se, le): |
| x1, y1, x2, y2 = [round(float(x), 2) for x in b.tolist()] |
| rows.append([id2label.get(int(l), str(int(l))), round(float(s), 3), x1, y1, x2, y2]) |
|
|
| return out_img, rows |
|
|
| demo = gr.Interface( |
| fn=run, |
| inputs=[ |
| gr.Image(type="pil", label="Imagen"), |
| gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="YOLOS conf"), |
| gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="DETR conf"), |
| gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Peso DETR (w2)"), |
| gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="NMS IoU"), |
| gr.Slider(0.05, 0.9, value=0.25, step=0.05, label="Score mínimo (post-ensemble)"), |
| ], |
| outputs=[ |
| gr.Image(type="pil", label="Ensemble (NMS)"), |
| gr.Dataframe(headers=["label", "score", "x1", "y1", "x2", "y2"], label="Detecciones"), |
| ], |
| title="Ensemble YOLOS + DETR con Non-Maximum Suppression", |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|