codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 14, 2024

Commit

fa20c7e

verified ·

1 Parent(s): fd7fc65

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +37 -14

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -163,6 +163,9 @@
       "source": [
         "# @title Load/initialize values (new version - ignore this cell)\n",
         "#Imports\n",
         "import json , os , shelve , torch\n",
         "import pandas as pd\n",
         "#----#\n",
@@ -211,7 +214,7 @@
         "      if _index <= 1 :\n",
         "        _file_name = f'{value}'\n",
         "        %cd {path_enc}\n",
-        "        _text_encodings = shelve.open(_file_name)\n",
         "        #Store text_encodings for the header items\n",
         "        text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
         "        text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
@@ -229,8 +232,8 @@
         "    _text_encodings.close() #close the text_encodings file\n",
         "    file_index = file_index + 1\n",
         "  #----------#\n",
-        "  RANGE = index\n",
-        "  return prompts , text_encodings , NUM_TOKENS\n",
         "  #--------#\n",
         "\n",
         "#for key in prompts:\n",
@@ -240,10 +243,9 @@
         "#------#\n"
       ],
       "metadata": {
-        "cellView": "form",
         "id": "rUXQ73IbonHY"
       },
-      "execution_count": null,
       "outputs": []
     },
     {
@@ -254,10 +256,9 @@
         "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
         "#------#\n",
         "path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
-        "prompts , text_encodings, RANGE = getPrompts(path)"
       ],
       "metadata": {
-        "cellView": "form",
         "id": "ZMG4CThUAmwW"
       },
       "execution_count": null,
@@ -1180,13 +1181,30 @@
     {
       "cell_type": "code",
       "source": [
-        "# @title Make your own text_encodings .db file for later use (using GPU is recommended)\n",
         "\n",
         "import json\n",
         "import pandas as pd\n",
         "import os\n",
         "import shelve\n",
         "import torch\n",
         "\n",
         "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
         "from transformers import AutoTokenizer\n",
@@ -1200,7 +1218,9 @@
         "my_mkdirs('/content/text_encodings/')\n",
         "filename = ''\n",
         "\n",
-        "for  file_index in range(34 + 1):\n",
         "  if file_index <1: continue\n",
         "  filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
         "  #🦜 fusion-t2i-prompt-features-1.json\n",
@@ -1219,16 +1239,19 @@
         "  # Calculate text_encoding for .json file contents and results as .db file\n",
         "\n",
         "  %cd /content/text_encodings/\n",
-        "  import shelve\n",
-        "  d = shelve.open(filename)\n",
         "  for index in range(NUM_ITEMS + 1):\n",
         "    inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
         "    text_features = model.get_text_features(**inputs).to(device)\n",
         "    text_features =  text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
-        "    d[f'{index}'] = text_features.to('cpu')\n",
         "  #----#\n",
-        "  d.close() #close the file\n",
-        "\n"
       ],
       "metadata": {
         "id": "9ZiTsF9jV0TV"

       "source": [
         "# @title Load/initialize values (new version - ignore this cell)\n",
         "#Imports\n",
+        "!pip install safetensors\n",
+        "from safetensors.torch import load_file\n",
+        "\n",
         "import json , os , shelve , torch\n",
         "import pandas as pd\n",
         "#----#\n",
         "      if _index <= 1 :\n",
         "        _file_name = f'{value}'\n",
         "        %cd {path_enc}\n",
+        "        _text_encodings = load_file(f'{_file_name}.safetensors')\n",
         "        #Store text_encodings for the header items\n",
         "        text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
         "        text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
         "    _text_encodings.close() #close the text_encodings file\n",
         "    file_index = file_index + 1\n",
         "  #----------#\n",
+        "  NUM_ITEMS = index\n",
+        "  return prompts , text_encodings , NUM_ITEMS\n",
         "  #--------#\n",
         "\n",
         "#for key in prompts:\n",
         "#------#\n"
       ],
       "metadata": {
         "id": "rUXQ73IbonHY"
       },
+      "execution_count": 3,
       "outputs": []
     },
     {
         "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
         "#------#\n",
         "path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
+        "prompts , text_encodings, NUM_ITEMS = getPrompts(path)"
       ],
       "metadata": {
         "id": "ZMG4CThUAmwW"
       },
       "execution_count": null,
     {
       "cell_type": "code",
       "source": [
+        "%cd /content/\n",
+        "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
+      ],
+      "metadata": {
+        "id": "GPAUFxZgPfrY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
         "\n",
         "import json\n",
         "import pandas as pd\n",
         "import os\n",
         "import shelve\n",
         "import torch\n",
+        "from safetensors.torch import save_file\n",
+        "\n",
+        "def my_mkdirs(folder):\n",
+        "  if os.path.exists(folder)==False:\n",
+        "    os.makedirs(folder)\n",
         "\n",
         "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
         "from transformers import AutoTokenizer\n",
         "my_mkdirs('/content/text_encodings/')\n",
         "filename = ''\n",
         "\n",
+        "NUM_FILES = 34\n",
+        "\n",
+        "for  file_index in range(NUM_FILES + 1):\n",
         "  if file_index <1: continue\n",
         "  filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
         "  #🦜 fusion-t2i-prompt-features-1.json\n",
         "  # Calculate text_encoding for .json file contents and results as .db file\n",
         "\n",
         "  %cd /content/text_encodings/\n",
+        "  text_encoding_dict = {}\n",
         "  for index in range(NUM_ITEMS + 1):\n",
         "    inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
         "    text_features = model.get_text_features(**inputs).to(device)\n",
         "    text_features =  text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
+        "    text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
+        "    save_file(text_encoding_dict, f'{filename}.safetensors')\n",
         "  #----#\n",
+        "\n",
+        "#from safetensors.torch import load_file\n",
+        "#%cd /content/text_encodings\n",
+        "#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
+        "#print(loaded[\"325\"])"
       ],
       "metadata": {
         "id": "9ZiTsF9jV0TV"