codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 10, 2024

Commit

73c73f4

verified ·

1 Parent(s): f378257

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +25 -24

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -117,7 +117,7 @@
         "id": "Ch9puvwKH1s3",
         "collapsed": true,
         "cellView": "form",
-        "outputId": "aa58503f-8e68-43bf-d73b-3eb877ae10e4",
         "colab": {
           "base_uri": "https://localhost:8080/"
         }
@@ -133,7 +133,7 @@
             "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
             "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
             "remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
-            "Unpacking objects: 100% (10/10), 306.93 KiB | 5.48 MiB/s, done.\n",
             "/content/sd_tokens\n"
           ]
         }
@@ -345,9 +345,7 @@
         "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
         "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
         "image_features = model.get_image_features(**inputs)\n",
-        "text_encoding_A = image_features\n",
-        "A =  text_encoding_A[0]\n",
-        "_A = LA.vector_norm(A, ord=2)\n",
         "prompt_A = \"the image\"\n",
         "name_A = prompt_A\n",
         "#-----#\n",
@@ -390,7 +388,6 @@
         "  C = token[id_C]\n",
         "  _C = LA.vector_norm(C, ord=2)\n",
         "  name_C = vocab[id_C]\n",
-        "\n",
         "  is_Prefix = 0\n",
         "\n",
         "\n",
@@ -421,10 +418,11 @@
         "    name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
         "  #-----#\n",
         "  ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
-        "  text_encoding_CB = model.get_text_features(**ids_CB)\n",
-        "  CB = text_encoding_CB[0]\n",
-        "  _CB =  LA.vector_norm(CB, ord=2)\n",
-        "  sim_CB = torch.dot(A,CB)/(_A*_CB)\n",
         "  #-----#\n",
         "  if restrictions == \"Prefix only\":\n",
         "    result = sim_CB\n",
@@ -434,10 +432,11 @@
         "  #-----#\n",
         "  name_BC = must_start_with + name_B + name_C + must_end_with\n",
         "  ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
-        "  text_encoding_BC = model.get_text_features(**ids_BC)\n",
-        "  BC = text_encoding_BC[0]\n",
-        "  _BC =  LA.vector_norm(BC, ord=2)\n",
-        "  sim_BC = torch.dot(A,BC)/(_A*_BC)\n",
         "  #-----#\n",
         "\n",
         "  result = sim_CB\n",
@@ -504,8 +503,8 @@
         "#------#\n",
         "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
         "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
-        "max_sim_ahead=max_sim_ahead*100\n",
-        "max_sim_ahead=max_sim_trail*100\n",
         "#-----#\n",
         "print(f\"place these items ahead of prompt :  {aheads}\")\n",
         "print(\"\")\n",
@@ -530,11 +529,14 @@
         "  if index == 3 : name = name + max_name_ahead + must_contain + max_name_trail\n",
         "  name = name + must_end_with\n",
         "  #----#\n",
-        "  ids_B = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
-        "  text_encoding_B = model.get_text_features(**ids_B)\n",
-        "  B = text_encoding_B[0]\n",
-        "  _B =  LA.vector_norm(B, ord=2)\n",
-        "  dots[index] = torch.dot(A,B)/(_A*_B)\n",
         "  names[index] = name\n",
         "#------#\n",
         "\n",
@@ -542,12 +544,11 @@
         "\n",
         "for index in range(NUM_PERMUTATIONS):\n",
         "  print(names[indices[index].item()])\n",
-        "  print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
         "  print('------')\n",
         "\n",
         "\n",
-        "\n",
-        ""
       ],
       "metadata": {
         "collapsed": true,

         "id": "Ch9puvwKH1s3",
         "collapsed": true,
         "cellView": "form",
+        "outputId": "8101e515-49f2-41d4-b03b-4195d56f50de",
         "colab": {
           "base_uri": "https://localhost:8080/"
         }
             "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
             "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
             "remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
+            "Unpacking objects: 100% (10/10), 306.93 KiB | 1.19 MiB/s, done.\n",
             "/content/sd_tokens\n"
           ]
         }
         "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
         "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
         "image_features = model.get_image_features(**inputs)\n",
+        "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
         "prompt_A = \"the image\"\n",
         "name_A = prompt_A\n",
         "#-----#\n",
         "  C = token[id_C]\n",
         "  _C = LA.vector_norm(C, ord=2)\n",
         "  name_C = vocab[id_C]\n",
         "  is_Prefix = 0\n",
         "\n",
         "\n",
         "    name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
         "  #-----#\n",
         "  ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "  text_features = model.get_text_features(**ids_CB)\n",
+        "  text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "  logit_scale = model.logit_scale.exp()\n",
+        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "  sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
         "  #-----#\n",
         "  if restrictions == \"Prefix only\":\n",
         "    result = sim_CB\n",
         "  #-----#\n",
         "  name_BC = must_start_with + name_B + name_C + must_end_with\n",
         "  ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "  text_features = model.get_text_features(**ids_BC)\n",
+        "  text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "  logit_scale = model.logit_scale.exp()\n",
+        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "  sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
         "  #-----#\n",
         "\n",
         "  result = sim_CB\n",
         "#------#\n",
         "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
         "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
+        "max_sim_ahead=max_sim_ahead\n",
+        "max_sim_ahead=max_sim_trail\n",
         "#-----#\n",
         "print(f\"place these items ahead of prompt :  {aheads}\")\n",
         "print(\"\")\n",
         "  if index == 3 : name = name + max_name_ahead + must_contain + max_name_trail\n",
         "  name = name + must_end_with\n",
         "  #----#\n",
+        "  ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "\n",
+        "  text_features = model.get_text_features(**ids)\n",
+        "  text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
+        "  logit_scale = model.logit_scale.exp()\n",
+        "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
+        "  sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
+        "  dots[index] = sim\n",
         "  names[index] = name\n",
         "#------#\n",
         "\n",
         "\n",
         "for index in range(NUM_PERMUTATIONS):\n",
         "  print(names[indices[index].item()])\n",
+        "  print(f'similiarity = {round(sorted[index].item(),2)} %')\n",
         "  print('------')\n",
         "\n",
         "\n",
+        "\n"
       ],
       "metadata": {
         "collapsed": true,