codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 12, 2024

Commit

b293fe3

verified ·

1 Parent(s): 213eb4a

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +194 -153

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -160,6 +160,114 @@
       "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
@@ -313,119 +421,6 @@
       "execution_count": null,
       "outputs": []
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "# @title 📝 Prompt similarity:  Order pre-made text_encodings\n",
-        "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
-        "from transformers import AutoTokenizer\n",
-        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
-        "from transformers import  CLIPProcessor, CLIPModel\n",
-        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
-        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
-        "\n",
-        "# Get text features for user input\n",
-        "inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
-        "text_features_A = model.get_text_features(**inputs)\n",
-        "text_features_A = text_features_A/text_features_A.norm(p=2, dim=-1, keepdim=True)\n",
-        "name_A = prompt\n",
-        "#------#\n",
-        "\n",
-        "# Load the .db file for prefix encodings\n",
-        "import shelve\n",
-        "_iters = -1\n",
-        "RANGE = NUM_PREFIX\n",
-        "NUM_PREFIX_LISTS = 1\n",
-        "dots = results_sim = torch.zeros(RANGE*NUM_PREFIX_LISTS)\n",
-        "for _PREFIX_ENC_VOCAB in PREFIX_ENC_VOCAB:\n",
-        "  _iters = _iters + 1\n",
-        "  d = shelve.open(_PREFIX_ENC_VOCAB)\n",
-        "  for _index in range(RANGE):\n",
-        "    index = _iters*RANGE + _index\n",
-        "    text_features = d[f'{_index}']\n",
-        "    text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
-        "    sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
-        "    dots[index] = sim\n",
-        "  #----#\n",
-        "  d.close() #close the file\n",
-        "#------#\n",
-        "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
-        "#------#\n",
-        "\n",
-        "# Load the .db file for prefix encodings\n",
-        "import shelve\n",
-        "_iters = -1\n",
-        "RANGE = NUM_SUFFIX\n",
-        "dots = results_sim = torch.zeros(RANGE*NUM_SUFFIX_LISTS)\n",
-        "for _SUFFIX_ENC_VOCAB in SUFFIX_ENC_VOCAB:\n",
-        "  _iters = _iters + 1\n",
-        "  d = shelve.open(_SUFFIX_ENC_VOCAB)\n",
-        "  for _index in range(RANGE):\n",
-        "    index = _iters*RANGE + _index\n",
-        "    text_features = d[f'{_index}']\n",
-        "    text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
-        "    sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
-        "    dots[index] = sim\n",
-        "  #----#\n",
-        "  d.close() #close the file\n",
-        "#------#\n",
-        "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
-        "#------#\n",
-        "\n",
-        "#Print the results\n",
-        "#'from_-encoded_suffix',\n",
-        "#'a_-_encoded_suffix' ,\n",
-        "#'by_-encoded_suffix' ,\n",
-        "#'encoded_suffix-_like'\n",
-        "\n",
-        "# title Show the 100 most similiar suffix and prefix text-encodings to the text encoding\n",
-        "RANGE = 100\n",
-        "_suffixes = '{'\n",
-        "_sims =  '{'\n",
-        "for index in range(RANGE):\n",
-        "  id = int(suffix_indices[index])\n",
-        "  ahead = \"from \"\n",
-        "  behind = \"\"\n",
-        "  if(id>NUM_SUFFIX*1):\n",
-        "    ahead = \"a \"\n",
-        "  if(id>NUM_SUFFIX*2):\n",
-        "    ahead = \"by \"\n",
-        "  if(id>NUM_SUFFIX*3):\n",
-        "    ahead = \"\"\n",
-        "    behind = \"like\"\n",
-        "  id = _modulus(id,NUM_SUFFIX)\n",
-        "  #------#\n",
-        "  sim = suffix_sorted[index].item()\n",
-        "  name = ahead + get_suffix(id) + behind\n",
-        "  if(get_suffix(id) == ' '): name = ahead + f'{id}' + behind\n",
-        "  _suffixes = _suffixes + name + '|'\n",
-        "  _sims = _sims + f'{round(sim*100,2)} %' + '|'\n",
-        "#------#\n",
-        "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
-        "_sims = (_sims + '}').replace('|}', '}')\n",
-        "\n",
-        "print('most similiar suffix items to prompt : ' + _suffixes)\n",
-        "print('similarity % for suffix items : ' + _sims)\n",
-        "print('')\n",
-        "\n",
-        "#-------#\n",
-        "\n",
-        "_prefixes = '{'\n",
-        "for index in range(RANGE):\n",
-        "  id = f'{prefix_indices[index]}'\n",
-        "  #sim = prefix_sorted[index]\n",
-        "  name = get_prefix(id)\n",
-        "  _prefixes = _prefixes + name + '|'\n",
-        "#------#\n",
-        "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
-        "print('most similiar prefix suffix to image : ' + _prefixes)\n"
-      ],
-      "metadata": {
-        "id": "xc-PbIYF428y"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
     {
       "cell_type": "markdown",
       "source": [
@@ -479,13 +474,13 @@
       ],
       "metadata": {
         "id": "ke6mZ1RZDOeB",
-        "outputId": "8f8c9d3f-cbda-4d9a-d126-c7f9311a74ee",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 1000
         }
       },
-      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -502,7 +497,7 @@
     {
       "cell_type": "code",
       "source": [
-        "# @title 🖼️ Image similarity : Order pre-made text_encodings\n",
         "from transformers import AutoTokenizer\n",
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "from transformers import  CLIPProcessor, CLIPModel\n",
@@ -514,37 +509,96 @@
         "image_features = model.get_image_features(**inputs)\n",
         "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
         "name_A = \"the image\"\n",
         "\n",
         "# Load the .db file for prefix encodings\n",
         "import shelve\n",
-        "d = shelve.open(PREFIX_ENC_VOCAB)\n",
-        "dots = results_sim = torch.zeros(NUM_PREFIX)\n",
-        "for index in range(NUM_PREFIX):\n",
-        "  text_features = d[f'{index}']\n",
-        "  logit_scale = model.logit_scale.exp()\n",
-        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
-        "  sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
-        "  dots[index] = sim\n",
-        "#----#\n",
         "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
-        "d.close() #close the file\n",
         "\n",
-        "# Load the .db file for suffix encodings\n",
         "import shelve\n",
-        "d = shelve.open(SUFFIX_ENC_VOCAB)\n",
-        "dots = results_sim = torch.zeros(NUM_SUFFIX)\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  text_features = d[f'{index}']\n",
-        "  logit_scale = model.logit_scale.exp()\n",
-        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
-        "  sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
-        "  dots[index] = sim\n",
-        "#----#\n",
         "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
-        "d.close() #close the file"
       ],
       "metadata": {
-        "id": "gaOB8rsOneIa"
       },
       "execution_count": null,
       "outputs": []
@@ -577,23 +631,10 @@
         "print('most similiar prefix tokens to image : ' + _prefixes)\n"
       ],
       "metadata": {
-        "id": "eZqMUhP0qYaK",
-        "outputId": "4801cded-e73c-4c0b-eb6e-608ed899ff49",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
       },
       "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "most similiar suffix tokens to image : {vfx |cleanup |warcraft |defend |avatar |wall |blu |indigo |dfs |bluetooth |orian |alliance |defence |defenses |defense |guardians |descendants |navis |raid |avengersendgame }\n",
-            "most similiar prefix tokens to image : {imperi-|blue-|bluec-|war-|blau-|veer-|blu-|vau-|bloo-|taun-|kavan-|kair-|storm-|anarch-|purple-|honor-|spartan-|swar-|raun-|andor-}\n"
-          ]
-        }
-      ]
     },
     {
       "cell_type": "code",

       "execution_count": null,
       "outputs": []
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title 📝 Prompt similarity:  Order pre-made text_encodings\n",
+        "prompt = \" a fast car on the road \" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
+        "from transformers import AutoTokenizer\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
+        "from transformers import  CLIPProcessor, CLIPModel\n",
+        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
+        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
+        "\n",
+        "# Get text features for user input\n",
+        "inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
+        "text_features_A = model.get_text_features(**inputs)\n",
+        "text_features_A = text_features_A/text_features_A.norm(p=2, dim=-1, keepdim=True)\n",
+        "name_A = prompt\n",
+        "#------#\n",
+        "\n",
+        "# Load the .db file for prefix encodings\n",
+        "import shelve\n",
+        "_iters = -1\n",
+        "RANGE = NUM_PREFIX\n",
+        "NUM_PREFIX_LISTS = 1\n",
+        "dots = results_sim = torch.zeros(RANGE*NUM_PREFIX_LISTS)\n",
+        "for _PREFIX_ENC_VOCAB in PREFIX_ENC_VOCAB:\n",
+        "  _iters = _iters + 1\n",
+        "  d = shelve.open(_PREFIX_ENC_VOCAB)\n",
+        "  for _index in range(RANGE):\n",
+        "    index = _iters*RANGE + _index\n",
+        "    text_features = d[f'{_index}']\n",
+        "    text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "    sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
+        "    dots[index] = sim\n",
+        "  #----#\n",
+        "  d.close() #close the file\n",
+        "#------#\n",
+        "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "#------#\n",
+        "\n",
+        "# Load the .db file for prefix encodings\n",
+        "import shelve\n",
+        "_iters = -1\n",
+        "RANGE = NUM_SUFFIX\n",
+        "dots = results_sim = torch.zeros(RANGE*NUM_SUFFIX_LISTS)\n",
+        "for _SUFFIX_ENC_VOCAB in SUFFIX_ENC_VOCAB:\n",
+        "  _iters = _iters + 1\n",
+        "  d = shelve.open(_SUFFIX_ENC_VOCAB)\n",
+        "  for _index in range(RANGE):\n",
+        "    index = _iters*RANGE + _index\n",
+        "    text_features = d[f'{_index}']\n",
+        "    text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "    sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
+        "    dots[index] = sim\n",
+        "  #----#\n",
+        "  d.close() #close the file\n",
+        "#------#\n",
+        "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "#------#\n",
+        "\n",
+        "#Print the results\n",
+        "# title Show the 100 most similiar suffix and prefix text-encodings to the text encoding\n",
+        "RANGE = 30\n",
+        "_suffixes = '{'\n",
+        "_sims =  '{'\n",
+        "for index in range(RANGE):\n",
+        "  id = int(suffix_indices[index])\n",
+        "  ahead = \"from \"\n",
+        "  behind = \"\"\n",
+        "  if(id>NUM_SUFFIX*1):\n",
+        "    ahead = \"a \"\n",
+        "  if(id>NUM_SUFFIX*2):\n",
+        "    ahead = \"by \"\n",
+        "  if(id>NUM_SUFFIX*3):\n",
+        "    ahead = \"\"\n",
+        "    behind = \"like\"\n",
+        "  id = _modulus(id,NUM_SUFFIX)\n",
+        "  #------#\n",
+        "  sim = suffix_sorted[index].item()\n",
+        "  name = ahead + get_suffix(id) + behind\n",
+        "  if(get_suffix(id) == ' '): name = ahead + f'{id}' + behind\n",
+        "  _suffixes = _suffixes + name + '|'\n",
+        "  _sims = _sims + f'{round(sim*100,2)} %' + '|'\n",
+        "#------#\n",
+        "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
+        "_sims = (_sims + '}').replace('|}', '}')\n",
+        "\n",
+        "print('most similiar suffix items to prompt : ' + _suffixes)\n",
+        "print('similarity % for suffix items : ' + _sims)\n",
+        "print('')\n",
+        "\n",
+        "#-------#\n",
+        "\n",
+        "_prefixes = '{'\n",
+        "for index in range(RANGE):\n",
+        "  id = f'{prefix_indices[index]}'\n",
+        "  #sim = prefix_sorted[index]\n",
+        "  name = get_prefix(id)\n",
+        "  _prefixes = _prefixes + name + '|'\n",
+        "#------#\n",
+        "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
+        "print('most similiar prefix suffix to image : ' + _prefixes)\n"
+      ],
+      "metadata": {
+        "id": "xc-PbIYF428y"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "source": [
       "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "markdown",
       "source": [
       ],
       "metadata": {
         "id": "ke6mZ1RZDOeB",
+        "outputId": "f98f9ea5-32d1-4cf7-b523-1c6b6e6792a2",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 1000
         }
       },
+      "execution_count": 2,
       "outputs": [
         {
           "output_type": "display_data",
     {
       "cell_type": "code",
       "source": [
+        "\n",
         "from transformers import AutoTokenizer\n",
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "from transformers import  CLIPProcessor, CLIPModel\n",
         "image_features = model.get_image_features(**inputs)\n",
         "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
         "name_A = \"the image\"\n",
+        "#-----#\n",
         "\n",
         "# Load the .db file for prefix encodings\n",
         "import shelve\n",
+        "_iters = -1\n",
+        "RANGE = NUM_PREFIX\n",
+        "NUM_PREFIX_LISTS = 1\n",
+        "dots = results_sim = torch.zeros(RANGE*NUM_PREFIX_LISTS)\n",
+        "for _PREFIX_ENC_VOCAB in PREFIX_ENC_VOCAB:\n",
+        "  _iters = _iters + 1\n",
+        "  d = shelve.open(_PREFIX_ENC_VOCAB)\n",
+        "  for _index in range(RANGE):\n",
+        "    index = _iters*RANGE + _index\n",
+        "    text_features = d[f'{_index}']\n",
+        "    logit_scale = model.logit_scale.exp()\n",
+        "    torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "    sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
+        "    dots[index] = sim\n",
+        "  #----#\n",
+        "  d.close() #close the file\n",
+        "#------#\n",
         "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "#------#\n",
         "\n",
+        "# Load the .db file for prefix encodings\n",
         "import shelve\n",
+        "_iters = -1\n",
+        "RANGE = NUM_SUFFIX\n",
+        "dots = results_sim = torch.zeros(RANGE*NUM_SUFFIX_LISTS)\n",
+        "for _SUFFIX_ENC_VOCAB in SUFFIX_ENC_VOCAB:\n",
+        "  _iters = _iters + 1\n",
+        "  d = shelve.open(_SUFFIX_ENC_VOCAB)\n",
+        "  for _index in range(RANGE):\n",
+        "    index = _iters*RANGE + _index\n",
+        "    text_features = d[f'{_index}']\n",
+        "    logit_scale = model.logit_scale.exp()\n",
+        "    torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "    sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
+        "    dots[index] = sim\n",
+        "  #----#\n",
+        "  d.close() #close the file\n",
+        "#------#\n",
         "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
+        "#------#\n",
+        "\n",
+        "#Print the results\n",
+        "# title Show the 100 most similiar suffix and prefix text-encodings to the text encoding\n",
+        "RANGE = 30\n",
+        "_suffixes = '{'\n",
+        "_sims =  '{'\n",
+        "for index in range(RANGE):\n",
+        "  id = int(suffix_indices[index])\n",
+        "  ahead = \"from \"\n",
+        "  behind = \"\"\n",
+        "  if(id>NUM_SUFFIX*1):\n",
+        "    ahead = \"a \"\n",
+        "  if(id>NUM_SUFFIX*2):\n",
+        "    ahead = \"by \"\n",
+        "  if(id>NUM_SUFFIX*3):\n",
+        "    ahead = \"\"\n",
+        "    behind = \"like\"\n",
+        "  id = _modulus(id,NUM_SUFFIX)\n",
+        "  #------#\n",
+        "  sim = suffix_sorted[index].item()\n",
+        "  name = ahead + get_suffix(id) + behind\n",
+        "  if(get_suffix(id) == ' '): name = ahead + f'{id}' + behind\n",
+        "  _suffixes = _suffixes + name + '|'\n",
+        "  _sims = _sims + f'{round(sim*100,2)} %' + '|'\n",
+        "#------#\n",
+        "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
+        "_sims = (_sims + '}').replace('|}', '}')\n",
+        "\n",
+        "print('most similiar suffix items to prompt : ' + _suffixes)\n",
+        "print('similarity % for suffix items : ' + _sims)\n",
+        "print('')\n",
+        "\n",
+        "#-------#\n",
+        "\n",
+        "_prefixes = '{'\n",
+        "for index in range(RANGE):\n",
+        "  id = f'{prefix_indices[index]}'\n",
+        "  #sim = prefix_sorted[index]\n",
+        "  name = get_prefix(id)\n",
+        "  _prefixes = _prefixes + name + '|'\n",
+        "#------#\n",
+        "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
+        "print('most similiar prefix suffix to image : ' + _prefixes)\n"
       ],
       "metadata": {
+        "id": "rebogpoyOG8k"
       },
       "execution_count": null,
       "outputs": []
         "print('most similiar prefix tokens to image : ' + _prefixes)\n"
       ],
       "metadata": {
+        "id": "eZqMUhP0qYaK"
       },
       "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",