Spaces:

adcelis
/

Practica2

Sleeping

App Files Files Community

Practica2 / app.py

adcelis

Update app.py

9decefc verified 3 months ago

raw

history blame contribute delete

3.83 kB

	import gradio as gr
	import torch
	from torchvision.ops import nms
	from PIL import Image, ImageDraw

	from transformers import AutoImageProcessor, AutoModelForObjectDetection

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	YOLOS_REPO_ID = "adcelis/yolo_finetuned_raccoon"
	DETR_REPO_ID = "adcelis/detr_finetuned_raccoon"

	# Cargar YOLOS
	proc_yolos = AutoImageProcessor.from_pretrained(YOLOS_REPO_ID)
	model_yolos = AutoModelForObjectDetection.from_pretrained(YOLOS_REPO_ID).to(DEVICE)
	model_yolos.eval()

	# Cargar DETR
	proc_detr = AutoImageProcessor.from_pretrained(DETR_REPO_ID)
	model_detr = AutoModelForObjectDetection.from_pretrained(DETR_REPO_ID).to(DEVICE)
	model_detr.eval()

	@torch.no_grad()
	def predict_tf(pil_img, processor, model, conf):
	inputs = processor(images=[pil_img], return_tensors="pt").to(DEVICE)
	outputs = model(**inputs)
	target_sizes = torch.tensor([[pil_img.size[1], pil_img.size[0]]], device=DEVICE)
	res = processor.post_process_object_detection(outputs, threshold=conf, target_sizes=target_sizes)[0]
	return res["boxes"].cpu(), res["scores"].cpu(), res["labels"].cpu()

	def ensemble_union_nms(boxes1, scores1, labels1, boxes2, scores2, labels2,
	w2=0.8, iou_thr=0.5, score_thr=0.25):
	boxes = torch.cat([boxes1, boxes2], dim=0)
	scores = torch.cat([scores1, scores2 * w2], dim=0)
	labels = torch.cat([labels1, labels2], dim=0)

	keep = scores >= score_thr
	boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
	if boxes.numel() == 0:
	return boxes, scores, labels

	keep_all = []
	for cls in labels.unique():
	idx = torch.where(labels == cls)[0]
	k = nms(boxes[idx], scores[idx], iou_thr)
	keep_all.append(idx[k])

	keep_all = torch.cat(keep_all)
	keep_all = keep_all[scores[keep_all].argsort(descending=True)]
	return boxes[keep_all], scores[keep_all], labels[keep_all]

	def draw_boxes(pil_img, boxes, scores, labels, id2label):
	img = pil_img.copy()
	draw = ImageDraw.Draw(img)
	for b, s, l in zip(boxes, scores, labels):
	x1, y1, x2, y2 = [float(x) for x in b.tolist()]
	draw.rectangle((x1, y1, x2, y2), outline="green", width=2)
	name = id2label.get(int(l), str(int(l)))
	draw.text((x1, y1), f"{name} {float(s):.2f}", fill="black")
	return img

	def run(pil_img, yolos_conf, detr_conf, w2, iou_thr, score_thr):
	pil_img = pil_img.convert("RGB")

	b1, s1, l1 = predict_tf(pil_img, proc_yolos, model_yolos, yolos_conf)
	b2, s2, l2 = predict_tf(pil_img, proc_detr, model_detr, detr_conf)

	be, se, le = ensemble_union_nms(b1, s1, l1, b2, s2, l2, w2=w2, iou_thr=iou_thr, score_thr=score_thr)

	# id2label (misma clase en ambos: raccoon)
	id2label = model_yolos.config.id2label
	out_img = draw_boxes(pil_img, be, se, le, id2label)

	rows = []
	for b, s, l in zip(be, se, le):
	x1, y1, x2, y2 = [round(float(x), 2) for x in b.tolist()]
	rows.append([id2label.get(int(l), str(int(l))), round(float(s), 3), x1, y1, x2, y2])

	return out_img, rows

	demo = gr.Interface(
	fn=run,
	inputs=[
	gr.Image(type="pil", label="Imagen"),
	gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="YOLOS conf"),
	gr.Slider(0.05, 0.9, value=0.5, step=0.05, label="DETR conf"),
	gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="Peso DETR (w2)"),
	gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="NMS IoU"),
	gr.Slider(0.05, 0.9, value=0.25, step=0.05, label="Score mínimo (post-ensemble)"),
	],
	outputs=[
	gr.Image(type="pil", label="Ensemble (NMS)"),
	gr.Dataframe(headers=["label", "score", "x1", "y1", "x2", "y2"], label="Detecciones"),
	],
	title="Ensemble YOLOS + DETR con Non-Maximum Suppression",
	)

	if __name__ == "__main__":
	demo.launch()