File size: 6,229 Bytes
9ef518d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dd99cf
9ef518d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e9ffb4
 
 
 
 
 
9ef518d
 
 
 
 
6647282
9ef518d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import math

import os
os.environ["MPLBACKEND"] = "Agg"   # backend sin Qt / sin ventanas

import gradio as gr

import torch
import torchvision
import cv2
import numpy as np
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import  Image
from ultralytics import SAM
#grad camara
from pytorch_grad_cam import EigenGradCAM
from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
device = 'cuda' if torch.cuda.is_available() else 'cpu'


def load():
    state = torch.load("detection1.pth", map_location=device)
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_feat = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_feat, num_classes=2)
    model.load_state_dict(state)
    model.to(device)
    model.eval()
    return model


GR_MODEL = load()
SAM_MODEL = SAM('sam2.1_b.pt')

def predict(image, thr=0.5):
    rgb_image = Image.fromarray(image).convert('RGB')
    grad_img_input = np.asarray(rgb_image)
    grad_img = None
    cv_image = cv2.cvtColor(np.array(rgb_image), cv2.COLOR_RGB2BGR)
    cv_copy_image = cv_image.copy()
    input = torchvision.transforms.ToTensor()(rgb_image).to(device=device)
    grad_input = input.clone()
    with torch.no_grad():
        predictions = GR_MODEL([input])[0]

    boxes = predictions["boxes"]
    scores = predictions["scores"]
    labels = predictions["labels"]

    print(scores)
    bounding_boxes = []
    labels_grad = []

    for (box, score, label) in zip(boxes, scores, labels):
        if score < thr:
            continue

        x1, y1, x2, y2 = map(math.floor, box)
        cv_image = cv2.rectangle(cv_image, (x1, y1),(x2, y2), thickness=3, color=[0.0, 255.0, 0.0, 255.0])
        cv_image = cv2.putText(cv_image, f'Huina {score*100:.2f}% de probabilidad', (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 3.0, [0.0, 255.0, 0.0,],3)
        bounding_boxes.append([x1, y1, x2, y2])
        labels_grad.append(int(label))
    
    if len(bounding_boxes) > 0:
        with torch.no_grad():
            results = SAM_MODEL(source=cv_copy_image,bboxes=bounding_boxes)
        mask_data = np.zeros((cv_image.shape[0], cv_image.shape[1]))
        for mask in results[0].masks.data:
            mask_data = np.maximum(mask_data, mask.cpu().numpy())
        cv_copy_image = (cv_copy_image * (mask_data)[:, :, np.newaxis]).astype(np.uint8)    
    
    if len(bounding_boxes) > 0:
        GR_MODEL.train()

        grad_cam = EigenGradCAM(
            model=GR_MODEL,
            target_layers=[GR_MODEL.backbone.body.layer4[-1]]
        )
        boxes_np  = np.array(bounding_boxes, dtype=np.float32)
        labels_np = np.array(labels_grad, dtype=np.int64)


        targets = [FasterRCNNBoxScoreTarget(labels=labels_np, bounding_boxes=boxes_np, iou_threshold=0.4)]
        gray_cam = grad_cam(grad_input.unsqueeze(0), targets=targets)[0]
        grad_img = show_cam_on_image(grad_img_input.astype('float32')/255.0, gray_cam, use_rgb=True)
                    
    return cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB), cv2.cvtColor(cv_copy_image, cv2.COLOR_RGB2BGR), grad_img

import gradio as gr
import json

STATIC_JSON = json.load(open('training-metrics.json', mode='r+'))
for obj in STATIC_JSON['metrics']:
    if obj.__contains__('AP100'):
        ar100 = obj["AP100"]
        del obj["AP100"]
        obj["AR100"] = ar100
    obj['AP'] = round(obj['AP'], 2)
    obj['AP75'] = round(obj['AP75'], 2)
    obj['AR100'] = round(obj['AR100'], 2)

STATIC_JSON['best_score'] = round(STATIC_JSON['best_score'], 2)

with gr.Blocks() as demo:
    gr.Markdown("# Detector de Huiñas en cámaras trampa")
    gr.Markdown("""
**Detección de Huiñas con FAST R-CNN y PyTorch**
- Épocas de entrenamiento: 80  
- Número de clases: 1  
- Label: Huiña  
- Métrica: AP@0.5 * 0.5 + AP@0.75 * 0.25 + AR@100 * 0.25  
- Train: 85% (398 imágenes)  
- Val: 10% (46 imágenes)  
- Test: 5% (24 imágenes)  
- Detección de gradiente con EigenGradCam  
- Segmentación de máscara con SAM (Segment Anything Model)
""")
    with gr.Row():
        with gr.Column(scale=1):
            inp_img = gr.Image(type="numpy", label="Imagen")
            thr = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Threshold")

            with gr.Row():
                btn_submit = gr.Button("Submit", variant="primary")
                btn_clear = gr.Button("Clear")

            # JSON fijo debajo de los botones
            gr.JSON(value=STATIC_JSON, label="Información de entrenamiento")

        with gr.Column(scale=1):
            out_bbox = gr.Image(type="numpy", label="Detección de bounding box")
            out_sam  = gr.Image(type="numpy", label="Segmentation Anything (SAM)")
            out_cam  = gr.Image(type="numpy", label="Eigen/Grad-CAM")

    gr.Examples(
        examples=[
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/7b4bf384-01210183.JPG", 0.50],
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/9b9e0260-01190727.JPG", 0.70],
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/5b66f2d4-01270042_2.JPG", 0.70],
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/4ebce977-01180239.JPG", 0.70],
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/1e4e1c7d-01260031.JPG", 0.70],
            ["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/0a9c759f-01040340.JPG", 0.70],

        ],
        inputs=[inp_img, thr],
        fn=predict,
        outputs=[out_bbox, out_sam, out_cam],
        cache_examples=False,    
        label="Ejemplos",
    )

    btn_submit.click(
        fn=predict,
        inputs=[inp_img, thr],
        outputs=[out_bbox, out_sam, out_cam],
    )

    def _clear():
        return None, 0.5, None, None, None

    btn_clear.click(
        fn=_clear,
        inputs=[],
        outputs=[inp_img, thr, out_bbox, out_sam, out_cam],
    )

demo.launch()