Spaces:
Runtime error
Runtime error
Commit
·
db03f5d
1
Parent(s):
1272949
Refactor generate_embedding function to remove unneeded variables
Browse files
app.py
CHANGED
|
@@ -43,7 +43,7 @@ def generate_embedding(text_data, image_data):
|
|
| 43 |
|
| 44 |
# Embed text data
|
| 45 |
text_embeddings = []
|
| 46 |
-
|
| 47 |
if text_data:
|
| 48 |
# If text_data is a string, convert to list of strings
|
| 49 |
if isinstance(text_data, str):
|
|
@@ -54,7 +54,7 @@ def generate_embedding(text_data, image_data):
|
|
| 54 |
text_data = list(text_data)
|
| 55 |
|
| 56 |
# Keep track of indices of empty text strings
|
| 57 |
-
|
| 58 |
|
| 59 |
# Remove empty text strings
|
| 60 |
text_data = [text for text in text_data if text != ""]
|
|
@@ -71,12 +71,12 @@ def generate_embedding(text_data, image_data):
|
|
| 71 |
text_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in text_embeddings]
|
| 72 |
|
| 73 |
# Insert empty strings at indices of empty text strings
|
| 74 |
-
for i in
|
| 75 |
text_embeddings.insert(i, "")
|
| 76 |
|
| 77 |
# Embed image data
|
| 78 |
image_embeddings = []
|
| 79 |
-
|
| 80 |
if image_data:
|
| 81 |
# If image_data is a single PIL image, convert to list of PIL images
|
| 82 |
if isinstance(image_data, PIL.Image.Image):
|
|
@@ -87,7 +87,7 @@ def generate_embedding(text_data, image_data):
|
|
| 87 |
image_data = list(image_data)
|
| 88 |
|
| 89 |
# Keep track of indices of None images
|
| 90 |
-
|
| 91 |
|
| 92 |
# Remove None images
|
| 93 |
image_data = [img for img in image_data if img is not None]
|
|
@@ -105,12 +105,12 @@ def generate_embedding(text_data, image_data):
|
|
| 105 |
image_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in image_embeddings]
|
| 106 |
|
| 107 |
# Insert empty strings at indices of empty images
|
| 108 |
-
for i in
|
| 109 |
image_embeddings.insert(i, "")
|
| 110 |
|
| 111 |
# Calculate cosine similarity between text and image embeddings
|
| 112 |
similarity = []
|
| 113 |
-
|
| 114 |
if text_embeddings and image_embeddings:
|
| 115 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
| 116 |
text_embeddings_filtered = []
|
|
@@ -120,7 +120,7 @@ def generate_embedding(text_data, image_data):
|
|
| 120 |
text_embeddings_filtered.append(text_embedding)
|
| 121 |
image_embeddings_filtered.append(image_embedding)
|
| 122 |
else:
|
| 123 |
-
|
| 124 |
|
| 125 |
# Calculate cosine similarity if there are any non-empty embedding pairs
|
| 126 |
if image_embeddings_filtered and text_embeddings_filtered:
|
|
@@ -138,7 +138,7 @@ def generate_embedding(text_data, image_data):
|
|
| 138 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
| 139 |
|
| 140 |
# Insert empty text strings in similarity
|
| 141 |
-
for i in
|
| 142 |
similarity.insert(i, "")
|
| 143 |
|
| 144 |
return (text_embeddings, image_embeddings, similarity)
|
|
|
|
| 43 |
|
| 44 |
# Embed text data
|
| 45 |
text_embeddings = []
|
| 46 |
+
empty_data_indices = []
|
| 47 |
if text_data:
|
| 48 |
# If text_data is a string, convert to list of strings
|
| 49 |
if isinstance(text_data, str):
|
|
|
|
| 54 |
text_data = list(text_data)
|
| 55 |
|
| 56 |
# Keep track of indices of empty text strings
|
| 57 |
+
empty_data_indices = [i for i, text in enumerate(text_data) if text == ""]
|
| 58 |
|
| 59 |
# Remove empty text strings
|
| 60 |
text_data = [text for text in text_data if text != ""]
|
|
|
|
| 71 |
text_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in text_embeddings]
|
| 72 |
|
| 73 |
# Insert empty strings at indices of empty text strings
|
| 74 |
+
for i in empty_data_indices:
|
| 75 |
text_embeddings.insert(i, "")
|
| 76 |
|
| 77 |
# Embed image data
|
| 78 |
image_embeddings = []
|
| 79 |
+
empty_data_indices = []
|
| 80 |
if image_data:
|
| 81 |
# If image_data is a single PIL image, convert to list of PIL images
|
| 82 |
if isinstance(image_data, PIL.Image.Image):
|
|
|
|
| 87 |
image_data = list(image_data)
|
| 88 |
|
| 89 |
# Keep track of indices of None images
|
| 90 |
+
empty_data_indices = [i for i, img in enumerate(image_data) if img is None]
|
| 91 |
|
| 92 |
# Remove None images
|
| 93 |
image_data = [img for img in image_data if img is not None]
|
|
|
|
| 105 |
image_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in image_embeddings]
|
| 106 |
|
| 107 |
# Insert empty strings at indices of empty images
|
| 108 |
+
for i in empty_data_indices:
|
| 109 |
image_embeddings.insert(i, "")
|
| 110 |
|
| 111 |
# Calculate cosine similarity between text and image embeddings
|
| 112 |
similarity = []
|
| 113 |
+
empty_data_indices = []
|
| 114 |
if text_embeddings and image_embeddings:
|
| 115 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
| 116 |
text_embeddings_filtered = []
|
|
|
|
| 120 |
text_embeddings_filtered.append(text_embedding)
|
| 121 |
image_embeddings_filtered.append(image_embedding)
|
| 122 |
else:
|
| 123 |
+
empty_data_indices.append(i)
|
| 124 |
|
| 125 |
# Calculate cosine similarity if there are any non-empty embedding pairs
|
| 126 |
if image_embeddings_filtered and text_embeddings_filtered:
|
|
|
|
| 138 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
| 139 |
|
| 140 |
# Insert empty text strings in similarity
|
| 141 |
+
for i in empty_data_indices:
|
| 142 |
similarity.insert(i, "")
|
| 143 |
|
| 144 |
return (text_embeddings, image_embeddings, similarity)
|