Spaces:
Sleeping
Sleeping
| import math | |
| import os | |
| os.environ["MPLBACKEND"] = "Agg" # backend sin Qt / sin ventanas | |
| import gradio as gr | |
| import torch | |
| import torchvision | |
| import cv2 | |
| import numpy as np | |
| from torchvision.models.detection.faster_rcnn import FastRCNNPredictor | |
| from PIL import Image | |
| from ultralytics import SAM | |
| #grad camara | |
| from pytorch_grad_cam import EigenGradCAM | |
| from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| def load(): | |
| state = torch.load("detection1.pth", map_location=device) | |
| model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) | |
| in_feat = model.roi_heads.box_predictor.cls_score.in_features | |
| model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_feat, num_classes=2) | |
| model.load_state_dict(state) | |
| model.to(device) | |
| model.eval() | |
| return model | |
| GR_MODEL = load() | |
| SAM_MODEL = SAM('sam2.1_b.pt') | |
| def predict(image, thr=0.5): | |
| rgb_image = Image.fromarray(image).convert('RGB') | |
| grad_img_input = np.asarray(rgb_image) | |
| grad_img = None | |
| cv_image = cv2.cvtColor(np.array(rgb_image), cv2.COLOR_RGB2BGR) | |
| cv_copy_image = cv_image.copy() | |
| input = torchvision.transforms.ToTensor()(rgb_image).to(device=device) | |
| grad_input = input.clone() | |
| with torch.no_grad(): | |
| predictions = GR_MODEL([input])[0] | |
| boxes = predictions["boxes"] | |
| scores = predictions["scores"] | |
| labels = predictions["labels"] | |
| print(scores) | |
| bounding_boxes = [] | |
| labels_grad = [] | |
| for (box, score, label) in zip(boxes, scores, labels): | |
| if score < thr: | |
| continue | |
| x1, y1, x2, y2 = map(math.floor, box) | |
| cv_image = cv2.rectangle(cv_image, (x1, y1),(x2, y2), thickness=3, color=[0.0, 255.0, 0.0, 255.0]) | |
| cv_image = cv2.putText(cv_image, f'Huina {score*100:.2f}% de probabilidad', (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 3.0, [0.0, 255.0, 0.0,],3) | |
| bounding_boxes.append([x1, y1, x2, y2]) | |
| labels_grad.append(int(label)) | |
| if len(bounding_boxes) > 0: | |
| with torch.no_grad(): | |
| results = SAM_MODEL(source=cv_copy_image,bboxes=bounding_boxes) | |
| mask_data = np.zeros((cv_image.shape[0], cv_image.shape[1])) | |
| for mask in results[0].masks.data: | |
| mask_data = np.maximum(mask_data, mask.cpu().numpy()) | |
| cv_copy_image = (cv_copy_image * (mask_data)[:, :, np.newaxis]).astype(np.uint8) | |
| if len(bounding_boxes) > 0: | |
| GR_MODEL.train() | |
| grad_cam = EigenGradCAM( | |
| model=GR_MODEL, | |
| target_layers=[GR_MODEL.backbone.body.layer4[-1]] | |
| ) | |
| boxes_np = np.array(bounding_boxes, dtype=np.float32) | |
| labels_np = np.array(labels_grad, dtype=np.int64) | |
| targets = [FasterRCNNBoxScoreTarget(labels=labels_np, bounding_boxes=boxes_np, iou_threshold=0.4)] | |
| gray_cam = grad_cam(grad_input.unsqueeze(0), targets=targets)[0] | |
| grad_img = show_cam_on_image(grad_img_input.astype('float32')/255.0, gray_cam, use_rgb=True) | |
| return cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB), cv2.cvtColor(cv_copy_image, cv2.COLOR_RGB2BGR), grad_img | |
| import gradio as gr | |
| import json | |
| STATIC_JSON = json.load(open('training-metrics.json', mode='r+')) | |
| for obj in STATIC_JSON['metrics']: | |
| if obj.__contains__('AP100'): | |
| ar100 = obj["AP100"] | |
| del obj["AP100"] | |
| obj["AR100"] = ar100 | |
| obj['AP'] = round(obj['AP'], 2) | |
| obj['AP75'] = round(obj['AP75'], 2) | |
| obj['AR100'] = round(obj['AR100'], 2) | |
| STATIC_JSON['best_score'] = round(STATIC_JSON['best_score'], 2) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Detector de Huiñas en cámaras trampa") | |
| gr.Markdown(""" | |
| **Detección de Huiñas con FAST R-CNN y PyTorch** | |
| - Épocas de entrenamiento: 80 | |
| - Número de clases: 1 | |
| - Label: Huiña | |
| - Métrica: AP@0.5 * 0.5 + AP@0.75 * 0.25 + AR@100 * 0.25 | |
| - Train: 85% (398 imágenes) | |
| - Val: 10% (46 imágenes) | |
| - Test: 5% (24 imágenes) | |
| - Detección de gradiente con EigenGradCam | |
| - Segmentación de máscara con SAM (Segment Anything Model) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| inp_img = gr.Image(type="numpy", label="Imagen") | |
| thr = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Threshold") | |
| with gr.Row(): | |
| btn_submit = gr.Button("Submit", variant="primary") | |
| btn_clear = gr.Button("Clear") | |
| # JSON fijo debajo de los botones | |
| gr.JSON(value=STATIC_JSON, label="Información de entrenamiento") | |
| with gr.Column(scale=1): | |
| out_bbox = gr.Image(type="numpy", label="Detección de bounding box") | |
| out_sam = gr.Image(type="numpy", label="Segmentation Anything (SAM)") | |
| out_cam = gr.Image(type="numpy", label="Eigen/Grad-CAM") | |
| gr.Examples( | |
| examples=[ | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/7b4bf384-01210183.JPG", 0.50], | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/9b9e0260-01190727.JPG", 0.70], | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/5b66f2d4-01270042_2.JPG", 0.70], | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/4ebce977-01180239.JPG", 0.70], | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/1e4e1c7d-01260031.JPG", 0.70], | |
| ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/0a9c759f-01040340.JPG", 0.70], | |
| ], | |
| inputs=[inp_img, thr], | |
| fn=predict, | |
| outputs=[out_bbox, out_sam, out_cam], | |
| cache_examples=False, | |
| label="Ejemplos", | |
| ) | |
| btn_submit.click( | |
| fn=predict, | |
| inputs=[inp_img, thr], | |
| outputs=[out_bbox, out_sam, out_cam], | |
| ) | |
| def _clear(): | |
| return None, 0.5, None, None, None | |
| btn_clear.click( | |
| fn=_clear, | |
| inputs=[], | |
| outputs=[inp_img, thr, out_bbox, out_sam, out_cam], | |
| ) | |
| demo.launch() | |