|
|
from transformers import CLIPModel, PreTrainedModel, CLIPProcessor, AutoConfig |
|
|
import torch |
|
|
import pickle |
|
|
from torch.nn.functional import cosine_similarity |
|
|
|
|
|
CLIP_MODEL = "openai/clip-vit-large-patch14" |
|
|
|
|
|
|
|
|
class Q16Model(PreTrainedModel): |
|
|
def __init__(self, config): |
|
|
super().__init__(config) |
|
|
self.clip_model = CLIPModel.from_pretrained(CLIP_MODEL) |
|
|
self.soft_prompts = None |
|
|
|
|
|
def load_soft_prompts(self, path): |
|
|
self.soft_prompts = torch.HalfTensor(pickle.load( |
|
|
open(path, 'rb'))).to('cpu').to(torch.float32) |
|
|
|
|
|
def forward(self, pixel_values): |
|
|
|
|
|
image_features = self.clip_model.get_image_features( |
|
|
pixel_values=pixel_values) |
|
|
|
|
|
|
|
|
similarities = cosine_similarity(image_features.unsqueeze( |
|
|
1), self.soft_prompts.unsqueeze(0), dim=-1) |
|
|
logits = similarities |
|
|
|
|
|
return logits |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): |
|
|
config = kwargs.pop("config", None) |
|
|
model = super(Q16Model, cls).from_pretrained( |
|
|
pretrained_model_name_or_path, config=config, *model_args, **kwargs) |
|
|
|
|
|
model.load_soft_prompts(f"{pretrained_model_name_or_path}/prompts.p") |
|
|
return model |
|
|
|
|
|
def save_pretrained(self, save_directory): |
|
|
super().save_pretrained(save_directory) |
|
|
|
|
|
with open(f"{save_directory}/prompts.p", 'wb') as f: |
|
|
pickle.dump(self.soft_prompts.cpu().numpy(), f) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
config = AutoConfig.from_pretrained(CLIP_MODEL) |
|
|
config.soft_prompt_dim = 768 |
|
|
|
|
|
|
|
|
model = Q16Model(config) |
|
|
|
|
|
|
|
|
model.load_soft_prompts("./prompts.p") |
|
|
|
|
|
|
|
|
save_directory = "." |
|
|
model.save_pretrained(save_directory) |
|
|
processor = CLIPProcessor.from_pretrained(CLIP_MODEL) |
|
|
processor.save_pretrained(save_directory) |
|
|
|