Practica2 / app.py
adcelis's picture
Update app.py
9decefc verified
import gradio as gr
import torch
from torchvision.ops import nms
from PIL import Image, ImageDraw
from transformers import AutoImageProcessor, AutoModelForObjectDetection
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
YOLOS_REPO_ID = "adcelis/yolo_finetuned_raccoon"
DETR_REPO_ID = "adcelis/detr_finetuned_raccoon"
# Cargar YOLOS
proc_yolos = AutoImageProcessor.from_pretrained(YOLOS_REPO_ID)
model_yolos = AutoModelForObjectDetection.from_pretrained(YOLOS_REPO_ID).to(DEVICE)
model_yolos.eval()
# Cargar DETR
proc_detr = AutoImageProcessor.from_pretrained(DETR_REPO_ID)
model_detr = AutoModelForObjectDetection.from_pretrained(DETR_REPO_ID).to(DEVICE)
model_detr.eval()
@torch.no_grad()
def predict_tf(pil_img, processor, model, conf):
inputs = processor(images=[pil_img], return_tensors="pt").to(DEVICE)
outputs = model(**inputs)
target_sizes = torch.tensor([[pil_img.size[1], pil_img.size[0]]], device=DEVICE)
res = processor.post_process_object_detection(outputs, threshold=conf, target_sizes=target_sizes)[0]
return res["boxes"].cpu(), res["scores"].cpu(), res["labels"].cpu()
def ensemble_union_nms(boxes1, scores1, labels1, boxes2, scores2, labels2,
w2=0.8, iou_thr=0.5, score_thr=0.25):
boxes = torch.cat([boxes1, boxes2], dim=0)
scores = torch.cat([scores1, scores2 * w2], dim=0)
labels = torch.cat([labels1, labels2], dim=0)
keep = scores >= score_thr
boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
if boxes.numel() == 0:
return boxes, scores, labels
keep_all = []
for cls in labels.unique():
idx = torch.where(labels == cls)[0]
k = nms(boxes[idx], scores[idx], iou_thr)
keep_all.append(idx[k])
keep_all = torch.cat(keep_all)
keep_all = keep_all[scores[keep_all].argsort(descending=True)]
return boxes[keep_all], scores[keep_all], labels[keep_all]
def draw_boxes(pil_img, boxes, scores, labels, id2label):
img = pil_img.copy()
draw = ImageDraw.Draw(img)
for b, s, l in zip(boxes, scores, labels):
x1, y1, x2, y2 = [float(x) for x in b.tolist()]
draw.rectangle((x1, y1, x2, y2), outline="green", width=2)
name = id2label.get(int(l), str(int(l)))
draw.text((x1, y1), f"{name} {float(s):.2f}", fill="black")
return img
def run(pil_img, yolos_conf, detr_conf, w2, iou_thr, score_thr):
pil_img = pil_img.convert("RGB")
b1, s1, l1 = predict_tf(pil_img, proc_yolos, model_yolos, yolos_conf)
b2, s2, l2 = predict_tf(pil_img, proc_detr, model_detr, detr_conf)
be, se, le = ensemble_union_nms(b1, s1, l1, b2, s2, l2, w2=w2, iou_thr=iou_thr, score_thr=score_thr)
# id2label (misma clase en ambos: raccoon)
id2label = model_yolos.config.id2label
out_img = draw_boxes(pil_img, be, se, le, id2label)
rows = []
for b, s, l in zip(be, se, le):
x1, y1, x2, y2 = [round(float(x), 2) for x in b.tolist()]
rows.append([id2label.get(int(l), str(int(l))), round(float(s), 3), x1, y1, x2, y2])
return out_img, rows
demo = gr.Interface(
fn=run,
inputs=[
gr.Image(type="pil", label="Imagen"),
gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="YOLOS conf"),
gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="DETR conf"),
gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Peso DETR (w2)"),
gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="NMS IoU"),
gr.Slider(0.05, 0.9, value=0.25, step=0.05, label="Score mínimo (post-ensemble)"),
],
outputs=[
gr.Image(type="pil", label="Ensemble (NMS)"),
gr.Dataframe(headers=["label", "score", "x1", "y1", "x2", "y2"], label="Detecciones"),
],
title="Ensemble YOLOS + DETR con Non-Maximum Suppression",
)
if __name__ == "__main__":
demo.launch()