import math import os os.environ["MPLBACKEND"] = "Agg" # backend sin Qt / sin ventanas import gradio as gr import torch import torchvision import cv2 import numpy as np from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from PIL import Image from ultralytics import SAM #grad camara from pytorch_grad_cam import EigenGradCAM from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget from pytorch_grad_cam.utils.image import show_cam_on_image device = 'cuda' if torch.cuda.is_available() else 'cpu' def load(): state = torch.load("detection1.pth", map_location=device) model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) in_feat = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_feat, num_classes=2) model.load_state_dict(state) model.to(device) model.eval() return model GR_MODEL = load() SAM_MODEL = SAM('sam2.1_b.pt') def predict(image, thr=0.5): rgb_image = Image.fromarray(image).convert('RGB') grad_img_input = np.asarray(rgb_image) grad_img = None cv_image = cv2.cvtColor(np.array(rgb_image), cv2.COLOR_RGB2BGR) cv_copy_image = cv_image.copy() input = torchvision.transforms.ToTensor()(rgb_image).to(device=device) grad_input = input.clone() with torch.no_grad(): predictions = GR_MODEL([input])[0] boxes = predictions["boxes"] scores = predictions["scores"] labels = predictions["labels"] print(scores) bounding_boxes = [] labels_grad = [] for (box, score, label) in zip(boxes, scores, labels): if score < thr: continue x1, y1, x2, y2 = map(math.floor, box) cv_image = cv2.rectangle(cv_image, (x1, y1),(x2, y2), thickness=3, color=[0.0, 255.0, 0.0, 255.0]) cv_image = cv2.putText(cv_image, f'Huina {score*100:.2f}% de probabilidad', (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 3.0, [0.0, 255.0, 0.0,],3) bounding_boxes.append([x1, y1, x2, y2]) labels_grad.append(int(label)) if len(bounding_boxes) > 0: with torch.no_grad(): results = SAM_MODEL(source=cv_copy_image,bboxes=bounding_boxes) mask_data = np.zeros((cv_image.shape[0], cv_image.shape[1])) for mask in results[0].masks.data: mask_data = np.maximum(mask_data, mask.cpu().numpy()) cv_copy_image = (cv_copy_image * (mask_data)[:, :, np.newaxis]).astype(np.uint8) if len(bounding_boxes) > 0: GR_MODEL.train() grad_cam = EigenGradCAM( model=GR_MODEL, target_layers=[GR_MODEL.backbone.body.layer4[-1]] ) boxes_np = np.array(bounding_boxes, dtype=np.float32) labels_np = np.array(labels_grad, dtype=np.int64) targets = [FasterRCNNBoxScoreTarget(labels=labels_np, bounding_boxes=boxes_np, iou_threshold=0.4)] gray_cam = grad_cam(grad_input.unsqueeze(0), targets=targets)[0] grad_img = show_cam_on_image(grad_img_input.astype('float32')/255.0, gray_cam, use_rgb=True) return cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB), cv2.cvtColor(cv_copy_image, cv2.COLOR_RGB2BGR), grad_img import gradio as gr import json STATIC_JSON = json.load(open('training-metrics.json', mode='r+')) for obj in STATIC_JSON['metrics']: if obj.__contains__('AP100'): ar100 = obj["AP100"] del obj["AP100"] obj["AR100"] = ar100 obj['AP'] = round(obj['AP'], 2) obj['AP75'] = round(obj['AP75'], 2) obj['AR100'] = round(obj['AR100'], 2) STATIC_JSON['best_score'] = round(STATIC_JSON['best_score'], 2) with gr.Blocks() as demo: gr.Markdown("# Detector de Huiñas en cámaras trampa") gr.Markdown(""" **Detección de Huiñas con FAST R-CNN y PyTorch** - Épocas de entrenamiento: 80 - Número de clases: 1 - Label: Huiña - Métrica: AP@0.5 * 0.5 + AP@0.75 * 0.25 + AR@100 * 0.25 - Train: 85% (398 imágenes) - Val: 10% (46 imágenes) - Test: 5% (24 imágenes) - Detección de gradiente con EigenGradCam - Segmentación de máscara con SAM (Segment Anything Model) """) with gr.Row(): with gr.Column(scale=1): inp_img = gr.Image(type="numpy", label="Imagen") thr = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Threshold") with gr.Row(): btn_submit = gr.Button("Submit", variant="primary") btn_clear = gr.Button("Clear") # JSON fijo debajo de los botones gr.JSON(value=STATIC_JSON, label="Información de entrenamiento") with gr.Column(scale=1): out_bbox = gr.Image(type="numpy", label="Detección de bounding box") out_sam = gr.Image(type="numpy", label="Segmentation Anything (SAM)") out_cam = gr.Image(type="numpy", label="Eigen/Grad-CAM") gr.Examples( examples=[ ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/7b4bf384-01210183.JPG", 0.50], ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/9b9e0260-01190727.JPG", 0.70], ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/5b66f2d4-01270042_2.JPG", 0.70], ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/4ebce977-01180239.JPG", 0.70], ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/1e4e1c7d-01260031.JPG", 0.70], ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/0a9c759f-01040340.JPG", 0.70], ], inputs=[inp_img, thr], fn=predict, outputs=[out_bbox, out_sam, out_cam], cache_examples=False, label="Ejemplos", ) btn_submit.click( fn=predict, inputs=[inp_img, thr], outputs=[out_bbox, out_sam, out_cam], ) def _clear(): return None, 0.5, None, None, None btn_clear.click( fn=_clear, inputs=[], outputs=[inp_img, thr, out_bbox, out_sam, out_cam], ) demo.launch()