Spaces:
Runtime error
Runtime error
Commit
·
2d8aec0
1
Parent(s):
df34abb
chore: reformat code
Browse files
app.py
CHANGED
|
@@ -6,16 +6,15 @@ import torch
|
|
| 6 |
import PIL.Image as Image
|
| 7 |
|
| 8 |
# Set device to GPU if available
|
| 9 |
-
device = torch.device(
|
| 10 |
|
| 11 |
# Load the OpenCLIP model and the necessary preprocessors
|
| 12 |
# openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
|
| 13 |
# openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
|
| 14 |
-
openclip_model_name =
|
| 15 |
openclip_model = "hf-hub:" + openclip_model_name
|
| 16 |
model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
|
| 17 |
-
model_name=openclip_model,
|
| 18 |
-
device=device
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
@@ -66,7 +65,10 @@ def generate_text_embedding(text_data: Union[str, tuple[str]]) -> list[str]:
|
|
| 66 |
text_embeddings = model.encode_text(text_data)
|
| 67 |
|
| 68 |
# Convert embeddings to list of strings
|
| 69 |
-
text_embeddings = [
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Insert empty strings at indices of empty text strings
|
| 72 |
for i in empty_data_indices:
|
|
@@ -74,8 +76,11 @@ def generate_text_embedding(text_data: Union[str, tuple[str]]) -> list[str]:
|
|
| 74 |
|
| 75 |
return text_embeddings
|
| 76 |
|
|
|
|
| 77 |
# Define function to generate image embeddings
|
| 78 |
-
def generate_image_embedding(
|
|
|
|
|
|
|
| 79 |
"""
|
| 80 |
Generate embeddings for image data using the OpenCLIP model.
|
| 81 |
|
|
@@ -118,7 +123,10 @@ def generate_image_embedding(image_data: Union[Image.Image, tuple[Image.Image]])
|
|
| 118 |
image_embeddings = model.encode_image(image_data)
|
| 119 |
|
| 120 |
# Convert embeddings to list of strings
|
| 121 |
-
image_embeddings = [
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
# Insert empty strings at indices of empty images
|
| 124 |
for i in empty_data_indices:
|
|
@@ -128,7 +136,10 @@ def generate_image_embedding(image_data: Union[Image.Image, tuple[Image.Image]])
|
|
| 128 |
|
| 129 |
|
| 130 |
# Define function to generate embeddings
|
| 131 |
-
def generate_embedding(
|
|
|
|
|
|
|
|
|
|
| 132 |
"""
|
| 133 |
Generate embeddings for text and image data using the OpenCLIP model.
|
| 134 |
|
|
@@ -162,7 +173,9 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
| 162 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
| 163 |
text_embeddings_filtered = []
|
| 164 |
image_embeddings_filtered = []
|
| 165 |
-
for i, (text_embedding, image_embedding) in enumerate(
|
|
|
|
|
|
|
| 166 |
if text_embedding != "" and image_embedding != "":
|
| 167 |
text_embeddings_filtered.append(text_embedding)
|
| 168 |
image_embeddings_filtered.append(image_embedding)
|
|
@@ -176,11 +189,18 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
| 176 |
image_embeddings_tensor = torch.tensor(image_embeddings_filtered)
|
| 177 |
|
| 178 |
# Normalize the embeddings
|
| 179 |
-
text_embedding_norm = text_embeddings_tensor / text_embeddings_tensor.norm(
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# Calculate cosine similarity
|
| 183 |
-
similarity = torch.nn.functional.cosine_similarity(
|
|
|
|
|
|
|
| 184 |
# Convert to percentage as text
|
| 185 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
| 186 |
|
|
@@ -195,7 +215,12 @@ def generate_embedding(text_data: Union[str, tuple[str]], image_data: Union[Imag
|
|
| 195 |
demo = gr.Interface(
|
| 196 |
fn=generate_embedding,
|
| 197 |
inputs=[
|
| 198 |
-
gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
gr.Image(height=512, type="pil", label="Image to Embed"),
|
| 200 |
],
|
| 201 |
outputs=[
|
|
@@ -208,7 +233,7 @@ demo = gr.Interface(
|
|
| 208 |
description="Generate embeddings using OpenCLIP model for text and images.",
|
| 209 |
allow_flagging="never",
|
| 210 |
batch=False,
|
| 211 |
-
api_name="embed"
|
| 212 |
)
|
| 213 |
|
| 214 |
# Enable queueing and launch the app
|
|
|
|
| 6 |
import PIL.Image as Image
|
| 7 |
|
| 8 |
# Set device to GPU if available
|
| 9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 10 |
|
| 11 |
# Load the OpenCLIP model and the necessary preprocessors
|
| 12 |
# openclip_model = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
|
| 13 |
# openclip_model = 'laion/CLIP-ViT-B-16-laion2B-s34B-b88K'
|
| 14 |
+
openclip_model_name = "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
|
| 15 |
openclip_model = "hf-hub:" + openclip_model_name
|
| 16 |
model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
|
| 17 |
+
model_name=openclip_model, device=device
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
|
|
|
|
| 65 |
text_embeddings = model.encode_text(text_data)
|
| 66 |
|
| 67 |
# Convert embeddings to list of strings
|
| 68 |
+
text_embeddings = [
|
| 69 |
+
embedding.detach().cpu().numpy().tolist()
|
| 70 |
+
for embedding in text_embeddings
|
| 71 |
+
]
|
| 72 |
|
| 73 |
# Insert empty strings at indices of empty text strings
|
| 74 |
for i in empty_data_indices:
|
|
|
|
| 76 |
|
| 77 |
return text_embeddings
|
| 78 |
|
| 79 |
+
|
| 80 |
# Define function to generate image embeddings
|
| 81 |
+
def generate_image_embedding(
|
| 82 |
+
image_data: Union[Image.Image, tuple[Image.Image]]
|
| 83 |
+
) -> list[str]:
|
| 84 |
"""
|
| 85 |
Generate embeddings for image data using the OpenCLIP model.
|
| 86 |
|
|
|
|
| 123 |
image_embeddings = model.encode_image(image_data)
|
| 124 |
|
| 125 |
# Convert embeddings to list of strings
|
| 126 |
+
image_embeddings = [
|
| 127 |
+
embedding.detach().cpu().numpy().tolist()
|
| 128 |
+
for embedding in image_embeddings
|
| 129 |
+
]
|
| 130 |
|
| 131 |
# Insert empty strings at indices of empty images
|
| 132 |
for i in empty_data_indices:
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
# Define function to generate embeddings
|
| 139 |
+
def generate_embedding(
|
| 140 |
+
text_data: Union[str, tuple[str]],
|
| 141 |
+
image_data: Union[Image.Image, tuple[Image.Image]],
|
| 142 |
+
) -> tuple[list[str], list[str], list[str]]:
|
| 143 |
"""
|
| 144 |
Generate embeddings for text and image data using the OpenCLIP model.
|
| 145 |
|
|
|
|
| 173 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
| 174 |
text_embeddings_filtered = []
|
| 175 |
image_embeddings_filtered = []
|
| 176 |
+
for i, (text_embedding, image_embedding) in enumerate(
|
| 177 |
+
zip(text_embeddings, image_embeddings)
|
| 178 |
+
):
|
| 179 |
if text_embedding != "" and image_embedding != "":
|
| 180 |
text_embeddings_filtered.append(text_embedding)
|
| 181 |
image_embeddings_filtered.append(image_embedding)
|
|
|
|
| 189 |
image_embeddings_tensor = torch.tensor(image_embeddings_filtered)
|
| 190 |
|
| 191 |
# Normalize the embeddings
|
| 192 |
+
text_embedding_norm = text_embeddings_tensor / text_embeddings_tensor.norm(
|
| 193 |
+
dim=-1, keepdim=True
|
| 194 |
+
)
|
| 195 |
+
image_embedding_norm = (
|
| 196 |
+
image_embeddings_tensor
|
| 197 |
+
/ image_embeddings_tensor.norm(dim=-1, keepdim=True)
|
| 198 |
+
)
|
| 199 |
|
| 200 |
# Calculate cosine similarity
|
| 201 |
+
similarity = torch.nn.functional.cosine_similarity(
|
| 202 |
+
text_embedding_norm, image_embedding_norm, dim=-1
|
| 203 |
+
)
|
| 204 |
# Convert to percentage as text
|
| 205 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
| 206 |
|
|
|
|
| 215 |
demo = gr.Interface(
|
| 216 |
fn=generate_embedding,
|
| 217 |
inputs=[
|
| 218 |
+
gr.Textbox(
|
| 219 |
+
lines=5,
|
| 220 |
+
max_lines=5,
|
| 221 |
+
placeholder="Enter Text Here...",
|
| 222 |
+
label="Text to Embed",
|
| 223 |
+
),
|
| 224 |
gr.Image(height=512, type="pil", label="Image to Embed"),
|
| 225 |
],
|
| 226 |
outputs=[
|
|
|
|
| 233 |
description="Generate embeddings using OpenCLIP model for text and images.",
|
| 234 |
allow_flagging="never",
|
| 235 |
batch=False,
|
| 236 |
+
api_name="embed",
|
| 237 |
)
|
| 238 |
|
| 239 |
# Enable queueing and launch the app
|