| from typing import Dict, List, Any | |
| import numpy as np | |
| from transformers import CLIPTokenizer, CLIPModel | |
| class PreTrainedPipeline(): | |
| def __init__(self, path=""): | |
| # Preload all the elements you are going to need at inference. | |
| # For instance your model, processors, tokenizer that might be needed. | |
| # This function is only called once, so do all the heavy processing I/O here""" | |
| self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
| self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") | |
| def __call__(self, inputs: str) -> List[float]: | |
| """ | |
| Args: | |
| inputs (:obj:`str`): | |
| a string to get the features from. | |
| Return: | |
| A :obj:`list` of floats: The features computed by the model. | |
| """ | |
| token_inputs = self.tokenizer([inputs], padding=True, return_tensors="pt") | |
| query_embed = self.model.get_text_features(**token_inputs) | |
| return query_embed.detach().cpu().numpy()[0].tolist() | |