from transformers import DetrImageProcessor, DetrForObjectDetection import torch from PIL import Image import gradio as gr # Cargar procesador y modelo processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") def detect_objects(image: Image.Image, threshold: float = 0.9): if image is None: return "Sube una imagen para iniciar." # Preprocesar e inferir inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # (H, W) en enteros; shape (batch, 2) h, w = image.size[1], image.size[0] target_sizes = torch.tensor([[h, w]], dtype=torch.int64) # Post-proceso results = processor.post_process_object_detection( outputs, target_sizes=target_sizes, threshold=threshold )[0] # Formatear salida lines = [] for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): label_name = model.config.id2label[label.item()] box_rounded = [round(float(v), 2) for v in box.tolist()] lines.append(f"Objeto: {label_name}, Score: {float(score):.2f}, Box: {box_rounded}") return "\n".join(lines) if lines else "Sin detecciones (prueba bajando el umbral)." demo = gr.Interface( fn=detect_objects, inputs=[gr.Image(type="pil", label="Imagen"), gr.Slider(0, 1, value=0.9, step=0.05, label="Umbral")], outputs=gr.Textbox(label="Detecciones"), title="Detección de Objetos con Transformers (DETR)", description="Sube una imagen y descubre qué objetos puede detectar." ) if __name__ == "__main__": demo.launch()