codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 11, 2024

Commit

af5a290

verified ·

1 Parent(s): e6ba544

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +137 -4

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -46,7 +46,8 @@
         "NUM_PREFIX = 13662\n",
         "NUM_SUFFIX = 32901\n",
         "\n",
-        "loaded_Image_A = False\n",
         "\n",
         "#Import the vocab.json\n",
         "import json\n",
@@ -117,6 +118,22 @@
         "  return ' ' #<---- return whitespace if other id like emojis etc.\n",
         "#--------#\n",
         "\n",
         "#print(get_token(35894))\n"
       ],
       "metadata": {
@@ -135,7 +152,7 @@
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
-        "token_name = \" banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
@@ -308,7 +325,7 @@
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
         "image_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
-        "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
         "# @markdown --------------------------\n",
         "\n",
         "image_path = \"\"\n",
@@ -332,6 +349,8 @@
         "else:\n",
         "  image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
         "#------#\n",
         "\n"
       ],
       "metadata": {
@@ -340,6 +359,89 @@
       "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
@@ -718,6 +820,35 @@
         "id": "hyK423TQCRup"
       }
     },
     {
       "cell_type": "markdown",
       "source": [
@@ -929,7 +1060,9 @@
         "\n",
         "//---//\n",
         "\n",
-        "https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/"
       ],
       "metadata": {
         "id": "njeJx_nSSA8H"

         "NUM_PREFIX = 13662\n",
         "NUM_SUFFIX = 32901\n",
         "\n",
+        "PREFIX_ENC_VOCAB = 'encoded_prefix_to_girl'\n",
+        "SUFFIX_ENC_VOCAB = 'encoded_suffix'\n",
         "\n",
         "#Import the vocab.json\n",
         "import json\n",
         "  return ' ' #<---- return whitespace if other id like emojis etc.\n",
         "#--------#\n",
         "\n",
+        "#get token from id (excluding tokens with special symbols)\n",
+        "def get_suffix(id):\n",
+        "  _id = f'{id}'\n",
+        "  if int(id) <= NUM_SUFFIX:\n",
+        "    return suffix[_id]\n",
+        "  return ' ' #<---- return whitespace if out of bounds\n",
+        "#--------#\n",
+        "\n",
+        "#get token from id (excluding tokens with special symbols)\n",
+        "def get_prefix(id):\n",
+        "  _id = f'{id}'\n",
+        "  if int(id) <= NUM_PREFIX:\n",
+        "    return prefix[_id]\n",
+        "  return ' ' #<---- return whitespace if out of bounds\n",
+        "#--------#\n",
+        "\n",
         "#print(get_token(35894))\n"
       ],
       "metadata": {
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
+        "token_name = \"prs \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
         "image_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
+        "colab_image_path = \"imperial.png\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
         "# @markdown --------------------------\n",
         "\n",
         "image_path = \"\"\n",
         "else:\n",
         "  image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
         "#------#\n",
+        "from google.colab.patches import cv2_imshow\n",
+        "cv2_imshow(image_A)\n",
         "\n"
       ],
       "metadata": {
       "execution_count": null,
       "outputs": []
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Order pre-made text_encodings to image similarity\n",
+        "from transformers import AutoTokenizer\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
+        "from transformers import  CLIPProcessor, CLIPModel\n",
+        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
+        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
+        "\n",
+        "# Get image features\n",
+        "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
+        "image_features = model.get_image_features(**inputs)\n",
+        "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "name_A = \"the image\"\n",
+        "\n",
+        "# Load the .db file for prefix encodings\n",
+        "import shelve\n",
+        "d = shelve.open(PREFIX_ENC_VOCAB)\n",
+        "dots = results_sim = torch.zeros(NUM_PREFIX)\n",
+        "for index in range(NUM_PREFIX):\n",
+        "  text_features = d[f'{index}']\n",
+        "  logit_scale = model.logit_scale.exp()\n",
+        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "  sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
+        "  dots[index] = sim\n",
+        "#----#\n",
+        "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "d.close() #close the file\n",
+        "\n",
+        "# Load the .db file for suffix encodings\n",
+        "import shelve\n",
+        "d = shelve.open(SUFFIX_ENC_VOCAB)\n",
+        "dots = results_sim = torch.zeros(NUM_SUFFIX)\n",
+        "for index in range(NUM_SUFFIX):\n",
+        "  text_features = d[f'{index}']\n",
+        "  logit_scale = model.logit_scale.exp()\n",
+        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "  sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
+        "  dots[index] = sim\n",
+        "#----#\n",
+        "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "d.close() #close the file"
+      ],
+      "metadata": {
+        "id": "gaOB8rsOneIa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Show the 10 most similiar suffix and prefix text-encodings to the image encoding\n",
+        "\n",
+        "_suffixes = '{'\n",
+        "for index in range(20):\n",
+        "  id = f'{suffix_indices[index]}'\n",
+        "  sim = suffix_sorted[index]\n",
+        "  name = get_suffix(id)\n",
+        "  _suffixes = _suffixes + name + '|'\n",
+        "#------#\n",
+        "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
+        "print('most similiar suffix tokens to image : ' + _suffixes)\n",
+        "\n",
+        "#-------#\n",
+        "\n",
+        "_prefixes = '{'\n",
+        "for index in range(20):\n",
+        "  id = f'{prefix_indices[index]}'\n",
+        "  sim = prefix_sorted[index]\n",
+        "  name = get_prefix(id)\n",
+        "  _prefixes = _prefixes + name + '|'\n",
+        "#------#\n",
+        "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
+        "print('most similiar prefix tokens to image : ' + _prefixes)\n"
+      ],
+      "metadata": {
+        "id": "eZqMUhP0qYaK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "source": [
         "id": "hyK423TQCRup"
       }
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Make your own text_encodings .db file for later use (rate is roughly 1K encodings per minute, so plan accordingly)\n",
+        "from transformers import AutoTokenizer\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
+        "from transformers import  CLIPProcessor, CLIPModel\n",
+        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
+        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
+        "\n",
+        "# Save results as .db file\n",
+        "import shelve\n",
+        "d = shelve.open('my_text_encodings')\n",
+        "for index in range(NUM_PREFIX):\n",
+        "  inputs = tokenizer(text = get_prefix(index)+'girl ', padding=True, return_tensors=\"pt\")\n",
+        "  text_features = model.get_text_features(**inputs)\n",
+        "  d[f'{index}'] = text_features\n",
+        "#----#\n",
+        "\n",
+        "d.close() #close the file\n",
+        "\n",
+        ""
+      ],
+      "metadata": {
+        "id": "9ZiTsF9jV0TV"
+      },
+      "execution_count": 10,
+      "outputs": []
+    },
     {
       "cell_type": "markdown",
       "source": [
         "\n",
         "//---//\n",
         "\n",
+        "https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/\n",
+        "\n",
+        "https://arxiv.org/pdf/2303.03032"
       ],
       "metadata": {
         "id": "njeJx_nSSA8H"