File size: 1,870 Bytes

import os
import torch
import clip
from utils import MLP, normalized

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class EndpointHandler:
    def __init__(self, path=""):
        model = MLP(768)

        s = torch.load(os.path.join(path, "sac+logos+ava1-l14-linearMSE.pth"), map_location=device)

        model.load_state_dict(s)
        model.to(device)
        model.eval()

        model2, preprocess = clip.load("ViT-L/14", device=device)

        self.model_dict = {}
        self.model_dict["classifier"] = model
        self.model_dict["clip_model"] = model2
        self.model_dict["clip_preprocess"] = preprocess
        self.model_dict["device"] = device

    def __call__(self, data):
        """
          data args:
              inputs (:obj:`PIL.Image`)
        Return:
              A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
        """
        # extract converted PIL image from serialized request
        image = data.pop("inputs", data)

        image_input = self.model_dict["clip_preprocess"](image).unsqueeze(0).to(self.model_dict["device"])
        with torch.no_grad():
            image_features = self.model_dict["clip_model"].encode_image(image_input)
            if self.model_dict["device"].type == "cuda":
                im_emb_arr = normalized(image_features.detach().cpu().numpy())
                im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.cuda.FloatTensor)
            else:
                im_emb_arr = normalized(image_features.detach().numpy())
                im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.FloatTensor)

            prediction = self.model_dict["classifier"](im_emb)
        score = prediction.item()

        return {"aesthetic score": score}