File size: 1,870 Bytes
562df22 286858d 562df22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import os
import torch
import clip
from utils import MLP, normalized
# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class EndpointHandler:
def __init__(self, path=""):
model = MLP(768)
s = torch.load(os.path.join(path, "sac+logos+ava1-l14-linearMSE.pth"), map_location=device)
model.load_state_dict(s)
model.to(device)
model.eval()
model2, preprocess = clip.load("ViT-L/14", device=device)
self.model_dict = {}
self.model_dict["classifier"] = model
self.model_dict["clip_model"] = model2
self.model_dict["clip_preprocess"] = preprocess
self.model_dict["device"] = device
def __call__(self, data):
"""
data args:
inputs (:obj:`PIL.Image`)
Return:
A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
"""
# extract converted PIL image from serialized request
image = data.pop("inputs", data)
image_input = self.model_dict["clip_preprocess"](image).unsqueeze(0).to(self.model_dict["device"])
with torch.no_grad():
image_features = self.model_dict["clip_model"].encode_image(image_input)
if self.model_dict["device"].type == "cuda":
im_emb_arr = normalized(image_features.detach().cpu().numpy())
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.cuda.FloatTensor)
else:
im_emb_arr = normalized(image_features.detach().numpy())
im_emb = torch.from_numpy(im_emb_arr).to(self.model_dict["device"]).type(torch.FloatTensor)
prediction = self.model_dict["classifier"](im_emb)
score = prediction.item()
return {"aesthetic score": score}
|