File size: 8,495 Bytes

a9c8060

import cv2
import torch
import torchvision.transforms as T
import torch.nn.functional as F
from torchvision.ops import nms
from model_architecture import ObjectDetectionModel

transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406],
                std=[0.299,0.224,0.225])])

def decode_target(grid_pred, S=None, image_size=384, conf_thresh=0.5, iou_thresh=0.5, wh_relative_to="image"):
    """

    Decodes predictions to boxes/labels/scores. Accepts either:

      - grid_pred: [S, S, A, 5+C]  (single image)

      - grid_pred: [B, S, S, A, 5+C] (batch)

    Returns:

      - list per image: each item is list of dicts { 'bbox':[x1,y1,x2,y2], 'label':int, 'score':float }

    Notes:

      - x,y,w,h are assumed to be logits that must be activated (sigmoid).

      - w,h interpreted according to wh_relative_to:

          "image" -> w_rel * image_size

          "cell"  -> w_rel * (image_size / S)

    """
  
    single_image = False
    if grid_pred.dim() == 4:         
        grid = grid_pred.unsqueeze(0) 
        single_image = True
    elif grid_pred.dim() == 5:      
        grid = grid_pred
    else:
        raise ValueError(f"Unexpected pred dim {grid_pred.dim()}, expected 4 or 5.")

    B = grid.shape[0]

    if S is None:
        S = grid.shape[1]

    C = grid.shape[-1] - 5
    results_batch = []

    for b in range(B):
        preds = grid[b]                    
        dets = []
        cell_size = image_size / S

        for j in range(S):
            for i in range(S):
                for a in range(preds.shape[2]):
                    cell = preds[j, i, a]   
                    if cell.numel() < 5:

                        continue

                    x_cell = torch.sigmoid(cell[0]).item()
                    y_cell = torch.sigmoid(cell[1]).item()
                    w_val  = cell[2].item()
                    h_val  = cell[3].item()
                    obj_p  = torch.sigmoid(cell[4]).item()


                    if C > 0:
                        class_logits = cell[5:5+C]
                        class_probs = F.softmax(class_logits, dim=0).detach().numpy()
                        class_id = int(class_probs.argmax())
                        class_p = float(class_probs[class_id])
                    else:
                        class_id = 0
                        class_p = 1.0

                    score = obj_p * class_p
                    if score < conf_thresh:
                        continue

                    x_abs = (i + x_cell) * cell_size
                    y_abs = (j + y_cell) * cell_size


                    if wh_relative_to == "cell":
                        w_abs = (torch.sigmoid(torch.tensor(w_val)).item()) * cell_size
                        h_abs = (torch.sigmoid(torch.tensor(h_val)).item()) * cell_size
                    else: 

                        w_abs = (torch.sigmoid(torch.tensor(w_val)).item()) * image_size
                        h_abs = (torch.sigmoid(torch.tensor(h_val)).item()) * image_size

                    x1 = x_abs - w_abs / 2.0
                    y1 = y_abs - h_abs / 2.0
                    x2 = x_abs + w_abs / 2.0
                    y2 = y_abs + h_abs / 2.0


                    x1 = max(0.0, min(image_size, x1))
                    y1 = max(0.0, min(image_size, y1))
                    x2 = max(0.0, min(image_size, x2))
                    y2 = max(0.0, min(image_size, y2))

                    if x2 <= x1 or y2 <= y1:
                        continue

                    dets.append({
                        "bbox":[x1, y1, x2, y2],
                        "label": int(class_id),
                        "score": float(score)
                    })


        if len(dets) > 0:
            boxes = torch.tensor([d["bbox"] for d in dets], dtype=torch.float32)
            scores = torch.tensor([d["score"] for d in dets], dtype=torch.float32)
            keep = nms(boxes, scores, iou_thresh).detach().tolist()
            dets = [dets[k] for k in keep]

        results_batch.append(dets)
    return results_batch[0] if single_image else results_batch
class ModelRunners :
    def __init__(self,Model) :
        self.__architecture = ObjectDetectionModel(1)
        self.__checkpoint = torch.load(Model)
        self.__architecture.load_state_dict(self.__checkpoint)
        self.__threshold = 0.25
        self.__imageSize = (720,480)
        self.__camera = 0 
    
    def set_threshold(self) :
        print("Note : for low threshold score can make model so sensitive at prediction ")
        print("Note : for hight threshold score can make model so selective at prediction")
        conf_threshold = float(input("set threshold score System (0.1/0.9) : "))

        self.__threshold = conf_threshold
    
    def __image_procces (self,pred_diction) :

        return decode_target(pred_diction,S=16,image_size=384,conf_thresh=self.__threshold) 
    
    def set_imagesize (self,size : tuple) :
        if not isinstance(size,tuple) :
            raise RuntimeError("the size parameters must be default (720,480)")
        self.__imageSize = size 

    def set_webcam (self,cam : int ) :
        if not isinstance(cam,int) :
            raise RuntimeError("WebCam input must be int (0,1)")
        if cam > 1 : 
            raise RuntimeError("Webcam just aivable for 0 internal and 1 external")
        self.__camera = cam 
    
    def main_model (self) :
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.__architecture.to(device)
        capture = cv2.VideoCapture(self.__camera)
        while True :
            ret,frame = capture.read()
            if not ret  :
                break 
            H,W = frame.shape[:2]
            img_rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
            img_resize = cv2.resize(img_rgb,(384,384))
            tensor = transform(img_resize).unsqueeze(0)  

            with torch.no_grad() :
                pred = self.__architecture(tensor)
            
            result = self.__image_procces(pred[0])
            scaleX,scaleY = W / 384 , H / 384 
            for r in result :
                xmin,ymin,xmax,ymax = r["bbox"]
                xmin,xmax = int(xmin * scaleX) , int(xmax * scaleX)
                ymin,ymax = int(ymin * scaleY), int (ymax * scaleY)

                conf = r["score"] 
                cv2.rectangle(frame,(xmin,ymin),(xmax,ymax),(0,255,0),2)
                cv2.putText(frame, f"{r['label']} {conf:.2f}",
                            (xmin, max(0, ymin - 5)), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            frame = cv2.resize(frame,(self.__imageSize))
            cv2.imshow("Live show cam",frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break 
        capture.release()
        cv2.destroyAllWindows()
    
    def run_model_at_video(self,path) :
        cap = cv2.VideoCapture(path)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.__architecture.to(device)
        while True : 
            ret,frame = cap.read()
            if not ret :
                break 
            H,W = frame.shape[:2]
            img_rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
            img_resize = cv2.resize(img_rgb,(384,384))
            tensor = transform(img_resize).unsqueeze(0)  
            with torch.no_grad() :
                pred = self.__architecture(tensor)

            result = self.__image_procces(pred[0])
            scaleX,scaleY = W / 384 , H / 384 
            for r in result :
                xmin,ymin,xmax,ymax = r["bbox"]
                xmin,xmax = int(xmin * scaleX) , int(xmax * scaleX)
                ymin,ymax = int(ymin * scaleY), int (ymax * scaleY)

                conf = r["score"] 
                cv2.rectangle(frame,(xmin,ymin),(xmax,ymax),(0,255,0),2)
                cv2.putText(frame, f"{r['label']} {conf:.2f}",
                            (xmin, max(0, ymin - 5)), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            frame = cv2.resize(frame,(self.__imageSize))
            cv2.imshow("Live show cam",frame)
            if cv2.waitKey(1) & 0xFF == ord('q') :
                break 
        cap.release()
        cv2.destroyAllWindows()