init

Files changed (10) hide show

Q16Model.py +62 -0
config.json +31 -0
merges.txt +0 -0
model.safetensors +3 -0
preprocessor_config.json +28 -0
prompts.p +0 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +31 -0
vocab.json +0 -0

Q16Model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from transformers import CLIPModel, PreTrainedModel, CLIPProcessor, AutoConfig
+import torch
+import pickle
+from torch.nn.functional import cosine_similarity
+CLIP_MODEL = "openai/clip-vit-large-patch14"
+class Q16Model(PreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.clip_model = CLIPModel.from_pretrained(CLIP_MODEL)
+        self.soft_prompts = None
+    def load_soft_prompts(self, path):
+        self.soft_prompts = torch.HalfTensor(pickle.load(
+            open(path, 'rb'))).to('cpu').to(torch.float32)
+    def forward(self, pixel_values):
+        # Get image encodings from CLIP model
+        image_features = self.clip_model.get_image_features(
+            pixel_values=pixel_values)
+        # Compare image features with soft prompts
+        similarities = cosine_similarity(image_features.unsqueeze(
+            1), self.soft_prompts.unsqueeze(0), dim=-1)
+        logits = similarities
+        return logits
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        config = kwargs.pop("config", None)
+        model = super(Q16Model, cls).from_pretrained(
+            pretrained_model_name_or_path, config=config, *model_args, **kwargs)
+        # Load the soft prompts
+        model.load_soft_prompts(f"{pretrained_model_name_or_path}/prompts.p")
+        return model
+    def save_pretrained(self, save_directory):
+        super().save_pretrained(save_directory)
+        # Save the soft prompts separately
+        with open(f"{save_directory}/prompts.p", 'wb') as f:
+            pickle.dump(self.soft_prompts.cpu().numpy(), f)
+if __name__ == "__main__":
+    # Define the configuration
+    config = AutoConfig.from_pretrained(CLIP_MODEL)
+    config.soft_prompt_dim = 768
+    # Initialize the custom model
+    model = Q16Model(config)
+    # Load the soft prompts
+    model.load_soft_prompts("./prompts.p")
+    # Save the model and processor
+    save_directory = "."
+    model.save_pretrained(save_directory)
+    processor = CLIPProcessor.from_pretrained(CLIP_MODEL)
+    processor.save_pretrained(save_directory)

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "openai/clip-vit-large-patch14",
+  "architectures": [
+    "Q16Model"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "soft_prompt_dim": 768,
+  "text_config": {
+    "dropout": 0.0,
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "model_type": "clip_text_model",
+    "num_attention_heads": 12,
+    "projection_dim": 768
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.4",
+  "vision_config": {
+    "dropout": 0.0,
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "projection_dim": 768
+  }
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e95556d2b3766f31a43fb26efb887846ccd6cfcce8234f1a0868d62647d7492
+size 1710544204

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

prompts.p ADDED Viewed

Binary file (6.3 kB). View file

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "CLIPProcessor",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff