Upload sd_token_similarity_calculator.ipynb
Browse files
sd_token_similarity_calculator.ipynb
CHANGED
|
@@ -163,6 +163,9 @@
|
|
| 163 |
"source": [
|
| 164 |
"# @title Load/initialize values (new version - ignore this cell)\n",
|
| 165 |
"#Imports\n",
|
|
|
|
|
|
|
|
|
|
| 166 |
"import json , os , shelve , torch\n",
|
| 167 |
"import pandas as pd\n",
|
| 168 |
"#----#\n",
|
|
@@ -211,7 +214,7 @@
|
|
| 211 |
" if _index <= 1 :\n",
|
| 212 |
" _file_name = f'{value}'\n",
|
| 213 |
" %cd {path_enc}\n",
|
| 214 |
-
" _text_encodings =
|
| 215 |
" #Store text_encodings for the header items\n",
|
| 216 |
" text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
|
| 217 |
" text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
|
|
@@ -229,8 +232,8 @@
|
|
| 229 |
" _text_encodings.close() #close the text_encodings file\n",
|
| 230 |
" file_index = file_index + 1\n",
|
| 231 |
" #----------#\n",
|
| 232 |
-
"
|
| 233 |
-
" return prompts , text_encodings ,
|
| 234 |
" #--------#\n",
|
| 235 |
"\n",
|
| 236 |
"#for key in prompts:\n",
|
|
@@ -240,10 +243,9 @@
|
|
| 240 |
"#------#\n"
|
| 241 |
],
|
| 242 |
"metadata": {
|
| 243 |
-
"cellView": "form",
|
| 244 |
"id": "rUXQ73IbonHY"
|
| 245 |
},
|
| 246 |
-
"execution_count":
|
| 247 |
"outputs": []
|
| 248 |
},
|
| 249 |
{
|
|
@@ -254,10 +256,9 @@
|
|
| 254 |
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
|
| 255 |
"#------#\n",
|
| 256 |
"path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
|
| 257 |
-
"prompts , text_encodings,
|
| 258 |
],
|
| 259 |
"metadata": {
|
| 260 |
-
"cellView": "form",
|
| 261 |
"id": "ZMG4CThUAmwW"
|
| 262 |
},
|
| 263 |
"execution_count": null,
|
|
@@ -1180,13 +1181,30 @@
|
|
| 1180 |
{
|
| 1181 |
"cell_type": "code",
|
| 1182 |
"source": [
|
| 1183 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1184 |
"\n",
|
| 1185 |
"import json\n",
|
| 1186 |
"import pandas as pd\n",
|
| 1187 |
"import os\n",
|
| 1188 |
"import shelve\n",
|
| 1189 |
"import torch\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1190 |
"\n",
|
| 1191 |
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
| 1192 |
"from transformers import AutoTokenizer\n",
|
|
@@ -1200,7 +1218,9 @@
|
|
| 1200 |
"my_mkdirs('/content/text_encodings/')\n",
|
| 1201 |
"filename = ''\n",
|
| 1202 |
"\n",
|
| 1203 |
-
"
|
|
|
|
|
|
|
| 1204 |
" if file_index <1: continue\n",
|
| 1205 |
" filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
|
| 1206 |
" #🦜 fusion-t2i-prompt-features-1.json\n",
|
|
@@ -1219,16 +1239,19 @@
|
|
| 1219 |
" # Calculate text_encoding for .json file contents and results as .db file\n",
|
| 1220 |
"\n",
|
| 1221 |
" %cd /content/text_encodings/\n",
|
| 1222 |
-
"
|
| 1223 |
-
" d = shelve.open(filename)\n",
|
| 1224 |
" for index in range(NUM_ITEMS + 1):\n",
|
| 1225 |
" inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
|
| 1226 |
" text_features = model.get_text_features(**inputs).to(device)\n",
|
| 1227 |
" text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
|
| 1228 |
-
"
|
|
|
|
| 1229 |
" #----#\n",
|
| 1230 |
-
"
|
| 1231 |
-
"\n"
|
|
|
|
|
|
|
|
|
|
| 1232 |
],
|
| 1233 |
"metadata": {
|
| 1234 |
"id": "9ZiTsF9jV0TV"
|
|
|
|
| 163 |
"source": [
|
| 164 |
"# @title Load/initialize values (new version - ignore this cell)\n",
|
| 165 |
"#Imports\n",
|
| 166 |
+
"!pip install safetensors\n",
|
| 167 |
+
"from safetensors.torch import load_file\n",
|
| 168 |
+
"\n",
|
| 169 |
"import json , os , shelve , torch\n",
|
| 170 |
"import pandas as pd\n",
|
| 171 |
"#----#\n",
|
|
|
|
| 214 |
" if _index <= 1 :\n",
|
| 215 |
" _file_name = f'{value}'\n",
|
| 216 |
" %cd {path_enc}\n",
|
| 217 |
+
" _text_encodings = load_file(f'{_file_name}.safetensors')\n",
|
| 218 |
" #Store text_encodings for the header items\n",
|
| 219 |
" text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
|
| 220 |
" text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
|
|
|
|
| 232 |
" _text_encodings.close() #close the text_encodings file\n",
|
| 233 |
" file_index = file_index + 1\n",
|
| 234 |
" #----------#\n",
|
| 235 |
+
" NUM_ITEMS = index\n",
|
| 236 |
+
" return prompts , text_encodings , NUM_ITEMS\n",
|
| 237 |
" #--------#\n",
|
| 238 |
"\n",
|
| 239 |
"#for key in prompts:\n",
|
|
|
|
| 243 |
"#------#\n"
|
| 244 |
],
|
| 245 |
"metadata": {
|
|
|
|
| 246 |
"id": "rUXQ73IbonHY"
|
| 247 |
},
|
| 248 |
+
"execution_count": 3,
|
| 249 |
"outputs": []
|
| 250 |
},
|
| 251 |
{
|
|
|
|
| 256 |
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
|
| 257 |
"#------#\n",
|
| 258 |
"path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
|
| 259 |
+
"prompts , text_encodings, NUM_ITEMS = getPrompts(path)"
|
| 260 |
],
|
| 261 |
"metadata": {
|
|
|
|
| 262 |
"id": "ZMG4CThUAmwW"
|
| 263 |
},
|
| 264 |
"execution_count": null,
|
|
|
|
| 1181 |
{
|
| 1182 |
"cell_type": "code",
|
| 1183 |
"source": [
|
| 1184 |
+
"%cd /content/\n",
|
| 1185 |
+
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
|
| 1186 |
+
],
|
| 1187 |
+
"metadata": {
|
| 1188 |
+
"id": "GPAUFxZgPfrY"
|
| 1189 |
+
},
|
| 1190 |
+
"execution_count": null,
|
| 1191 |
+
"outputs": []
|
| 1192 |
+
},
|
| 1193 |
+
{
|
| 1194 |
+
"cell_type": "code",
|
| 1195 |
+
"source": [
|
| 1196 |
+
"# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
|
| 1197 |
"\n",
|
| 1198 |
"import json\n",
|
| 1199 |
"import pandas as pd\n",
|
| 1200 |
"import os\n",
|
| 1201 |
"import shelve\n",
|
| 1202 |
"import torch\n",
|
| 1203 |
+
"from safetensors.torch import save_file\n",
|
| 1204 |
+
"\n",
|
| 1205 |
+
"def my_mkdirs(folder):\n",
|
| 1206 |
+
" if os.path.exists(folder)==False:\n",
|
| 1207 |
+
" os.makedirs(folder)\n",
|
| 1208 |
"\n",
|
| 1209 |
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
| 1210 |
"from transformers import AutoTokenizer\n",
|
|
|
|
| 1218 |
"my_mkdirs('/content/text_encodings/')\n",
|
| 1219 |
"filename = ''\n",
|
| 1220 |
"\n",
|
| 1221 |
+
"NUM_FILES = 34\n",
|
| 1222 |
+
"\n",
|
| 1223 |
+
"for file_index in range(NUM_FILES + 1):\n",
|
| 1224 |
" if file_index <1: continue\n",
|
| 1225 |
" filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
|
| 1226 |
" #🦜 fusion-t2i-prompt-features-1.json\n",
|
|
|
|
| 1239 |
" # Calculate text_encoding for .json file contents and results as .db file\n",
|
| 1240 |
"\n",
|
| 1241 |
" %cd /content/text_encodings/\n",
|
| 1242 |
+
" text_encoding_dict = {}\n",
|
|
|
|
| 1243 |
" for index in range(NUM_ITEMS + 1):\n",
|
| 1244 |
" inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
|
| 1245 |
" text_features = model.get_text_features(**inputs).to(device)\n",
|
| 1246 |
" text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
|
| 1247 |
+
" text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
|
| 1248 |
+
" save_file(text_encoding_dict, f'{filename}.safetensors')\n",
|
| 1249 |
" #----#\n",
|
| 1250 |
+
"\n",
|
| 1251 |
+
"#from safetensors.torch import load_file\n",
|
| 1252 |
+
"#%cd /content/text_encodings\n",
|
| 1253 |
+
"#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
|
| 1254 |
+
"#print(loaded[\"325\"])"
|
| 1255 |
],
|
| 1256 |
"metadata": {
|
| 1257 |
"id": "9ZiTsF9jV0TV"
|