OrangeRecognizer / read_cam.py
jarric's picture
Initial upload
e81e6d0 verified
import numpy as np
import torch
import cv2
from sympy.stats.rv import probability
from torch.nn import Sequential, Conv2d, BatchNorm2d, Hardswish, ReLU, AdaptiveAvgPool2d, Hardsigmoid, Linear, Dropout
from torchvision.io import read_image
from torchvision.models import MobileNet_V3_Small_Weights, MobileNetV3
from torchvision.models.mobilenetv3 import InvertedResidual
from torchvision.ops import Conv2dNormActivation, SqueezeExcitation
if __name__ == "__main__":
torch.serialization.add_safe_globals([MobileNetV3])
torch.serialization.add_safe_globals([Sequential])
torch.serialization.add_safe_globals([Conv2dNormActivation])
torch.serialization.add_safe_globals([Conv2d])
torch.serialization.add_safe_globals([BatchNorm2d])
torch.serialization.add_safe_globals([Hardswish])
torch.serialization.add_safe_globals([InvertedResidual])
torch.serialization.add_safe_globals([ReLU])
torch.serialization.add_safe_globals([SqueezeExcitation])
torch.serialization.add_safe_globals([AdaptiveAvgPool2d])
torch.serialization.add_safe_globals([Hardsigmoid])
torch.serialization.add_safe_globals([Linear])
torch.serialization.add_safe_globals([Dropout])
mobilenetv3 = torch.load("/home/jarric/PycharmProjects/OrangeRecognizer/outputs/2025-04-21 12:08:02.020215/model_99_finetuned.pt")
mobilenetv3 = mobilenetv3.to("cuda")
mobilenet_v3_transforms = MobileNet_V3_Small_Weights.IMAGENET1K_V1.transforms().to("cuda")
class_assigment = {'citrus canker': 0, 'healthy': 1, 'melanose': 2}
class_numbering = {v:k for k, v in class_assigment.items()}
font = cv2.FONT_HERSHEY_SIMPLEX
vid_cam = cv2.VideoCapture(0)
confidence_list = []
if not vid_cam.isOpened():
print("Cannot open camera")
exit(-1)
warmed_up = False
final_confidence = 0
class_name = ""
averaged_final_confidences = []
while True:
ret, frame = vid_cam.read()
if not ret:
exit(-2)
orig_frame = frame.copy()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized_for_lookup = cv2.resize(orig_frame, (244 * 2, 224 * 2))
tensor_frame = torch.Tensor(frame).permute((2, 0, 1)).to(torch.uint8).to("cuda") # fuck you torch
processed_frame = mobilenet_v3_transforms(tensor_frame.unsqueeze(0))
logits = mobilenetv3(processed_frame)
confidences = logits.softmax(dim=-1)
_, classification = torch.max(logits.data, 1)
value, _ = torch.max(confidences, 1)
confidence_list.append(confidences.detach().cpu().squeeze(0))
if len(confidence_list) >= 50:
warmed_up = True
averaged_final_confidences = []
for class_num in class_numbering.keys():
averaged_final_confidences.append(np.array(confidence_list)[:, class_num].mean())
final_confidence, selected_class = torch.max(torch.Tensor(averaged_final_confidences).to("cuda").unsqueeze(0), 1)
class_name = class_numbering[selected_class.detach().cpu().item()]
averages_for_reference = [value.item() for value in averaged_final_confidences]
print(averages_for_reference)
confidence_list.clear()
if warmed_up:
cv2.putText(frame_resized_for_lookup, f"{class_name}, [{round(final_confidence.detach().cpu().item(), 2)}]", (50, 50), font, 1, (0, 0, 255), 3)
cv2.imshow("frame", frame_resized_for_lookup)
if cv2.waitKey(1) == ord('q'):
break
# vid_cam.release()
cv2.destroyAllWindows()
exit(0)