DeteccionHuina / app.py
SebastianSanchez6293
github images
8e9ffb4
import math
import os
os.environ["MPLBACKEND"] = "Agg" # backend sin Qt / sin ventanas
import gradio as gr
import torch
import torchvision
import cv2
import numpy as np
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
from ultralytics import SAM
#grad camara
from pytorch_grad_cam import EigenGradCAM
from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def load():
state = torch.load("detection1.pth", map_location=device)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_feat = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_feat, num_classes=2)
model.load_state_dict(state)
model.to(device)
model.eval()
return model
GR_MODEL = load()
SAM_MODEL = SAM('sam2.1_b.pt')
def predict(image, thr=0.5):
rgb_image = Image.fromarray(image).convert('RGB')
grad_img_input = np.asarray(rgb_image)
grad_img = None
cv_image = cv2.cvtColor(np.array(rgb_image), cv2.COLOR_RGB2BGR)
cv_copy_image = cv_image.copy()
input = torchvision.transforms.ToTensor()(rgb_image).to(device=device)
grad_input = input.clone()
with torch.no_grad():
predictions = GR_MODEL([input])[0]
boxes = predictions["boxes"]
scores = predictions["scores"]
labels = predictions["labels"]
print(scores)
bounding_boxes = []
labels_grad = []
for (box, score, label) in zip(boxes, scores, labels):
if score < thr:
continue
x1, y1, x2, y2 = map(math.floor, box)
cv_image = cv2.rectangle(cv_image, (x1, y1),(x2, y2), thickness=3, color=[0.0, 255.0, 0.0, 255.0])
cv_image = cv2.putText(cv_image, f'Huina {score*100:.2f}% de probabilidad', (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 3.0, [0.0, 255.0, 0.0,],3)
bounding_boxes.append([x1, y1, x2, y2])
labels_grad.append(int(label))
if len(bounding_boxes) > 0:
with torch.no_grad():
results = SAM_MODEL(source=cv_copy_image,bboxes=bounding_boxes)
mask_data = np.zeros((cv_image.shape[0], cv_image.shape[1]))
for mask in results[0].masks.data:
mask_data = np.maximum(mask_data, mask.cpu().numpy())
cv_copy_image = (cv_copy_image * (mask_data)[:, :, np.newaxis]).astype(np.uint8)
if len(bounding_boxes) > 0:
GR_MODEL.train()
grad_cam = EigenGradCAM(
model=GR_MODEL,
target_layers=[GR_MODEL.backbone.body.layer4[-1]]
)
boxes_np = np.array(bounding_boxes, dtype=np.float32)
labels_np = np.array(labels_grad, dtype=np.int64)
targets = [FasterRCNNBoxScoreTarget(labels=labels_np, bounding_boxes=boxes_np, iou_threshold=0.4)]
gray_cam = grad_cam(grad_input.unsqueeze(0), targets=targets)[0]
grad_img = show_cam_on_image(grad_img_input.astype('float32')/255.0, gray_cam, use_rgb=True)
return cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB), cv2.cvtColor(cv_copy_image, cv2.COLOR_RGB2BGR), grad_img
import gradio as gr
import json
STATIC_JSON = json.load(open('training-metrics.json', mode='r+'))
for obj in STATIC_JSON['metrics']:
if obj.__contains__('AP100'):
ar100 = obj["AP100"]
del obj["AP100"]
obj["AR100"] = ar100
obj['AP'] = round(obj['AP'], 2)
obj['AP75'] = round(obj['AP75'], 2)
obj['AR100'] = round(obj['AR100'], 2)
STATIC_JSON['best_score'] = round(STATIC_JSON['best_score'], 2)
with gr.Blocks() as demo:
gr.Markdown("# Detector de Huiñas en cámaras trampa")
gr.Markdown("""
**Detección de Huiñas con FAST R-CNN y PyTorch**
- Épocas de entrenamiento: 80
- Número de clases: 1
- Label: Huiña
- Métrica: AP@0.5 * 0.5 + AP@0.75 * 0.25 + AR@100 * 0.25
- Train: 85% (398 imágenes)
- Val: 10% (46 imágenes)
- Test: 5% (24 imágenes)
- Detección de gradiente con EigenGradCam
- Segmentación de máscara con SAM (Segment Anything Model)
""")
with gr.Row():
with gr.Column(scale=1):
inp_img = gr.Image(type="numpy", label="Imagen")
thr = gr.Slider(0.1, 0.99, value=0.5, step=0.01, label="Threshold")
with gr.Row():
btn_submit = gr.Button("Submit", variant="primary")
btn_clear = gr.Button("Clear")
# JSON fijo debajo de los botones
gr.JSON(value=STATIC_JSON, label="Información de entrenamiento")
with gr.Column(scale=1):
out_bbox = gr.Image(type="numpy", label="Detección de bounding box")
out_sam = gr.Image(type="numpy", label="Segmentation Anything (SAM)")
out_cam = gr.Image(type="numpy", label="Eigen/Grad-CAM")
gr.Examples(
examples=[
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/7b4bf384-01210183.JPG", 0.50],
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/9b9e0260-01190727.JPG", 0.70],
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/5b66f2d4-01270042_2.JPG", 0.70],
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/4ebce977-01180239.JPG", 0.70],
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/1e4e1c7d-01260031.JPG", 0.70],
["https://raw.githubusercontent.com/SebastianSanchez6293/zoomancia-images/refs/heads/main/0a9c759f-01040340.JPG", 0.70],
],
inputs=[inp_img, thr],
fn=predict,
outputs=[out_bbox, out_sam, out_cam],
cache_examples=False,
label="Ejemplos",
)
btn_submit.click(
fn=predict,
inputs=[inp_img, thr],
outputs=[out_bbox, out_sam, out_cam],
)
def _clear():
return None, 0.5, None, None, None
btn_clear.click(
fn=_clear,
inputs=[],
outputs=[inp_img, thr, out_bbox, out_sam, out_cam],
)
demo.launch()