|
|
import numpy as np |
|
|
import torch |
|
|
import cv2 |
|
|
from sympy.stats.rv import probability |
|
|
from torch.nn import Sequential, Conv2d, BatchNorm2d, Hardswish, ReLU, AdaptiveAvgPool2d, Hardsigmoid, Linear, Dropout |
|
|
from torchvision.io import read_image |
|
|
from torchvision.models import MobileNet_V3_Small_Weights, MobileNetV3 |
|
|
from torchvision.models.mobilenetv3 import InvertedResidual |
|
|
from torchvision.ops import Conv2dNormActivation, SqueezeExcitation |
|
|
|
|
|
if __name__ == "__main__": |
|
|
torch.serialization.add_safe_globals([MobileNetV3]) |
|
|
torch.serialization.add_safe_globals([Sequential]) |
|
|
torch.serialization.add_safe_globals([Conv2dNormActivation]) |
|
|
torch.serialization.add_safe_globals([Conv2d]) |
|
|
torch.serialization.add_safe_globals([BatchNorm2d]) |
|
|
torch.serialization.add_safe_globals([Hardswish]) |
|
|
torch.serialization.add_safe_globals([InvertedResidual]) |
|
|
torch.serialization.add_safe_globals([ReLU]) |
|
|
torch.serialization.add_safe_globals([SqueezeExcitation]) |
|
|
torch.serialization.add_safe_globals([AdaptiveAvgPool2d]) |
|
|
torch.serialization.add_safe_globals([Hardsigmoid]) |
|
|
torch.serialization.add_safe_globals([Linear]) |
|
|
torch.serialization.add_safe_globals([Dropout]) |
|
|
|
|
|
mobilenetv3 = torch.load("/home/jarric/PycharmProjects/OrangeRecognizer/outputs/2025-04-21 12:08:02.020215/model_99_finetuned.pt") |
|
|
mobilenetv3 = mobilenetv3.to("cuda") |
|
|
mobilenet_v3_transforms = MobileNet_V3_Small_Weights.IMAGENET1K_V1.transforms().to("cuda") |
|
|
class_assigment = {'citrus canker': 0, 'healthy': 1, 'melanose': 2} |
|
|
class_numbering = {v:k for k, v in class_assigment.items()} |
|
|
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
|
|
|
|
vid_cam = cv2.VideoCapture(0) |
|
|
confidence_list = [] |
|
|
if not vid_cam.isOpened(): |
|
|
print("Cannot open camera") |
|
|
exit(-1) |
|
|
|
|
|
warmed_up = False |
|
|
final_confidence = 0 |
|
|
class_name = "" |
|
|
averaged_final_confidences = [] |
|
|
while True: |
|
|
ret, frame = vid_cam.read() |
|
|
if not ret: |
|
|
exit(-2) |
|
|
|
|
|
orig_frame = frame.copy() |
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
frame_resized_for_lookup = cv2.resize(orig_frame, (244 * 2, 224 * 2)) |
|
|
tensor_frame = torch.Tensor(frame).permute((2, 0, 1)).to(torch.uint8).to("cuda") |
|
|
processed_frame = mobilenet_v3_transforms(tensor_frame.unsqueeze(0)) |
|
|
logits = mobilenetv3(processed_frame) |
|
|
confidences = logits.softmax(dim=-1) |
|
|
_, classification = torch.max(logits.data, 1) |
|
|
value, _ = torch.max(confidences, 1) |
|
|
confidence_list.append(confidences.detach().cpu().squeeze(0)) |
|
|
|
|
|
if len(confidence_list) >= 50: |
|
|
warmed_up = True |
|
|
|
|
|
averaged_final_confidences = [] |
|
|
for class_num in class_numbering.keys(): |
|
|
averaged_final_confidences.append(np.array(confidence_list)[:, class_num].mean()) |
|
|
|
|
|
final_confidence, selected_class = torch.max(torch.Tensor(averaged_final_confidences).to("cuda").unsqueeze(0), 1) |
|
|
class_name = class_numbering[selected_class.detach().cpu().item()] |
|
|
averages_for_reference = [value.item() for value in averaged_final_confidences] |
|
|
print(averages_for_reference) |
|
|
|
|
|
confidence_list.clear() |
|
|
|
|
|
if warmed_up: |
|
|
cv2.putText(frame_resized_for_lookup, f"{class_name}, [{round(final_confidence.detach().cpu().item(), 2)}]", (50, 50), font, 1, (0, 0, 255), 3) |
|
|
cv2.imshow("frame", frame_resized_for_lookup) |
|
|
|
|
|
if cv2.waitKey(1) == ord('q'): |
|
|
break |
|
|
|
|
|
|
|
|
cv2.destroyAllWindows() |
|
|
exit(0) |