panda1835
/

Shelly

Object Detection

English

animal

sea-turtle

Model card Files Files and versions

xet

Community

panda1835 commited on Jul 20, 2023

Commit

adbd0a7

1 Parent(s): e7dec10

Update handler.py

Browse files

Files changed (1) hide show

handler.py +46 -3

handler.py CHANGED Viewed

@@ -1,13 +1,46 @@
 from typing import Dict, List, Any
 from ultralytics import YOLO
 import os
 class EndpointHandler():
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
-        self.model = YOLO(os.path.join(path, 'yolov8_2023-07-19_yolov8m.pt'))
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         data args:
@@ -41,5 +74,15 @@ class EndpointHandler():
             y1 = max(y1 - offset, 0)
             y2 = min(y2 + offset, H)
         new_image = img[y1:y2, x1:x2]
-        # Return the annotated original image with the square cropped
-        return annotated.tolist(), new_image.tolist()

 from typing import Dict, List, Any
 from ultralytics import YOLO
 import os
+import torch
+import torch.nn as nn
+import torchvision.transforms as T
+from PIL import Image
+class LinearClassifier(torch.nn.Module):
+    def __init__(self, input_dim=384, output_dim=7):
+        super(LinearClassifier, self).__init__()
+        self.linear = torch.nn.Linear(input_dim, output_dim)
+        self.linear.weight.data.normal_(mean=0.0, std=0.01)
+        self.linear.bias.data.zero_()
+    def forward(self, x):
+        return self.linear(x)
 class EndpointHandler():
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
+        self.dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
+        device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
+        self.dinov2_vits14.to(device)
+        print('Successfully load dinov2_vits14 model')
+        self.yolov8_model = YOLO(os.path.join(path, 'yolov8_2023-07-19_yolov8m.pt'))
+        self.linear_model = LinearClassifier()
+        self.linear_model.load_state_dict(torch.load(os.path.join(path, 'linear_2023-07-18_v0.2.pt')))
+        self.linear_model.eval()
+        self.transform_image = T.Compose([
+            T.ToTensor(),
+            T.Resize(244),
+            T.CenterCrop(224),
+            T.Normalize([0.5], [0.5])
+        ])
+        with open(os.path.join(path, 'labels.txt'), 'r') as f:
+            self.labels = f.read().split(',') # loggerhead,green,leatherback...
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         data args:
             y1 = max(y1 - offset, 0)
             y2 = min(y2 + offset, H)
         new_image = img[y1:y2, x1:x2]
+        new_image = self.transform_image(Image.fromarray(cropped))[:3].unsqueeze(0)
+        embedding = self.dinov2_vits14(new_image.to(device))
+        prediction = self.linear_model(embedding)
+        percentage = nn.Softmax(dim=1)(prediction).detach().numpy().round(2)[0].tolist()
+        result = {}
+        for i in range(len(self.labels)):
+            result[name_en2vi[self.labels[i]]] = percentage[i]
+        # Return the annotated original image with the square cropped and result dict
+        return annotated.tolist(), result