File size: 2,100 Bytes
036e7c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
import torchvision
import cv2
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

from .utils import to_rgb


def create_effNetv2s():
    model = torchvision.models.efficientnet_v2_s(weights='IMAGENET1K_V1')
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Sequential(
                nn.Linear(num_features, NUM_CLASSES),
                nn.Sigmoid()
            )
    return model

def create_convnet():
    model = torchvision.models.convnext_base(weights='IMAGENET1K_V1')
    num_features = model.classifier[2].in_features
    model.classifier[2] = nn.Sequential(
                nn.Linear(num_features, NUM_CLASSES),
                nn.Sigmoid()
            )
    return model

def create_model(model_name):
    model = _MODEL[model_name]()
    model.load_state_dict(torch.load(_WEIGHT[model_name], map_location=torch.device('cpu')))
    model.to(DEVICE)
    return model

def create_transform():
    transform = transforms.Compose([
        transforms.Resize((HEIGHT, WEIGHT)),
        transforms.ToTensor(),
        transforms.Normalize((0.6078, 0.6078, 0.6078), (0.1932, 0.1932, 0.1932))
    ])
    return transform


_MODEL = {
    "effNetv2s": create_effNetv2s,
    "convnet": create_convnet
}

_WEIGHT = {
    "effNetv2s": './classification/weights/effnetv2s.pt',
    "convnet": './classification/weights/convnet.pt',
}

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
HEIGHT = 224
WEIGHT = 224
NUM_CLASSES = 44


class Classifier():
    def __init__(self, model_name="effNetv2s"):
        self.model = create_model(model_name)
        self.transform = create_transform()

    def predict(self, image):
        '''
            input: cv2 image
            output: multi-label probability vector
        '''
        image = to_rgb(image)
        image = Image.fromarray(image)
        image = self.transform(image)
        self.model.eval()
        with torch.no_grad():
            out = self.model(image.unsqueeze(0).to(DEVICE)).cpu().numpy()
            return out