import numpy as np import torch import cv2 from sympy.stats.rv import probability from torch.nn import Sequential, Conv2d, BatchNorm2d, Hardswish, ReLU, AdaptiveAvgPool2d, Hardsigmoid, Linear, Dropout from torchvision.io import read_image from torchvision.models import MobileNet_V3_Small_Weights, MobileNetV3 from torchvision.models.mobilenetv3 import InvertedResidual from torchvision.ops import Conv2dNormActivation, SqueezeExcitation if __name__ == "__main__": torch.serialization.add_safe_globals([MobileNetV3]) torch.serialization.add_safe_globals([Sequential]) torch.serialization.add_safe_globals([Conv2dNormActivation]) torch.serialization.add_safe_globals([Conv2d]) torch.serialization.add_safe_globals([BatchNorm2d]) torch.serialization.add_safe_globals([Hardswish]) torch.serialization.add_safe_globals([InvertedResidual]) torch.serialization.add_safe_globals([ReLU]) torch.serialization.add_safe_globals([SqueezeExcitation]) torch.serialization.add_safe_globals([AdaptiveAvgPool2d]) torch.serialization.add_safe_globals([Hardsigmoid]) torch.serialization.add_safe_globals([Linear]) torch.serialization.add_safe_globals([Dropout]) mobilenetv3 = torch.load("/home/jarric/PycharmProjects/OrangeRecognizer/outputs/2025-04-21 12:08:02.020215/model_99_finetuned.pt") mobilenetv3 = mobilenetv3.to("cuda") mobilenet_v3_transforms = MobileNet_V3_Small_Weights.IMAGENET1K_V1.transforms().to("cuda") class_assigment = {'citrus canker': 0, 'healthy': 1, 'melanose': 2} class_numbering = {v:k for k, v in class_assigment.items()} font = cv2.FONT_HERSHEY_SIMPLEX vid_cam = cv2.VideoCapture(0) confidence_list = [] if not vid_cam.isOpened(): print("Cannot open camera") exit(-1) warmed_up = False final_confidence = 0 class_name = "" averaged_final_confidences = [] while True: ret, frame = vid_cam.read() if not ret: exit(-2) orig_frame = frame.copy() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized_for_lookup = cv2.resize(orig_frame, (244 * 2, 224 * 2)) tensor_frame = torch.Tensor(frame).permute((2, 0, 1)).to(torch.uint8).to("cuda") # fuck you torch processed_frame = mobilenet_v3_transforms(tensor_frame.unsqueeze(0)) logits = mobilenetv3(processed_frame) confidences = logits.softmax(dim=-1) _, classification = torch.max(logits.data, 1) value, _ = torch.max(confidences, 1) confidence_list.append(confidences.detach().cpu().squeeze(0)) if len(confidence_list) >= 50: warmed_up = True averaged_final_confidences = [] for class_num in class_numbering.keys(): averaged_final_confidences.append(np.array(confidence_list)[:, class_num].mean()) final_confidence, selected_class = torch.max(torch.Tensor(averaged_final_confidences).to("cuda").unsqueeze(0), 1) class_name = class_numbering[selected_class.detach().cpu().item()] averages_for_reference = [value.item() for value in averaged_final_confidences] print(averages_for_reference) confidence_list.clear() if warmed_up: cv2.putText(frame_resized_for_lookup, f"{class_name}, [{round(final_confidence.detach().cpu().item(), 2)}]", (50, 50), font, 1, (0, 0, 255), 3) cv2.imshow("frame", frame_resized_for_lookup) if cv2.waitKey(1) == ord('q'): break # vid_cam.release() cv2.destroyAllWindows() exit(0)