GOWaz commited on
Commit
a8e2ab4
·
verified ·
1 Parent(s): 0bc63f7

Upload 11 files

Browse files
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from PIL import Image
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ from clip_embedding import Clip
8
+ from efficientnet_embedding import EfficientNet
9
+ from vit_embedding import Vit
10
+ from resnet_embedding import Resnet
11
+ from dino_embedding import Dino
12
+ from histogram_embedding import cosine, get_embedding
13
+ from bovw_embedding import Bovw
14
+
15
+ resnet = Resnet()
16
+ vit = Vit()
17
+ efficientnet = EfficientNet()
18
+ bovw = Bovw()
19
+ dino = Dino()
20
+ clip = Clip()
21
+
22
+
23
+ def get_image_embedding(image: Image.Image, name):
24
+ match name:
25
+ case "ResNet":
26
+ return resnet.get_embedding(image).cpu().numpy()
27
+ case "VIT":
28
+ return vit.get_embedding(image).cpu().numpy()
29
+ case "EfficientNet":
30
+ return efficientnet.get_embedding(image).cpu().numpy()
31
+ case "Histogram":
32
+ return get_embedding(image)
33
+ case "BOVW":
34
+ return bovw.get_embedding(image)
35
+ case "DINO":
36
+ return dino.get_embedding(image).cpu().numpy()
37
+ case _:
38
+ return clip.get_embedding(image).cpu().numpy()
39
+
40
+
41
+
42
+ def compare_images(main_img, compare_imgs, name):
43
+ results = []
44
+ if name in ("Histogram", "ResNet", "BOVW"):
45
+ main_emb = get_image_embedding(main_img, name)
46
+ for img in compare_imgs:
47
+ emb = get_image_embedding(img, name)
48
+ results.append((img, round(cosine(main_emb, emb) * 100, 2)))
49
+ else:
50
+ main_embedding = get_image_embedding(main_img, name)
51
+ for img in compare_imgs:
52
+ emb = get_image_embedding(img, name)
53
+ score = cosine_similarity(main_embedding, emb)[0][0]
54
+ percentage = round(score * 100, 2)
55
+ results.append((img, percentage))
56
+
57
+
58
+ results.sort(key=lambda x: x[1], reverse=True)
59
+
60
+ return results
61
+
62
+
63
+ model_list = ["CLIP", "VIT", "EfficientNet", "ResNet", "DINO", "Histogram", "BOVW"]
64
+
65
+
66
+ with gr.Blocks() as demo:
67
+ gr.Tab("Image Embedding")
68
+ gr.Markdown("# Image Similarity Finder")
69
+ gr.Markdown(
70
+ "Upload a main image and compare it to others. Results show similarity percentages using embeddings.")
71
+
72
+ with gr.Row():
73
+ with gr.Column():
74
+ main_image = gr.Image(type="pil", label="Main Image")
75
+ compare_images_input = gr.File(file_count="multiple", file_types=["image"], label="Comparison Images")
76
+ modelName = gr.Dropdown(model_list, label="Model", value=model_list[0])
77
+ submit_btn = gr.Button("Compare")
78
+
79
+ with gr.Column():
80
+ gallery = gr.Gallery(label="Similarity Results")
81
+ similarity_text = gr.Textbox(label="Similarity Scores")
82
+
83
+
84
+ def process_comparison(main_img, compare_files, name):
85
+ compare_imgs = [Image.open(file.name) for file in compare_files]
86
+ results = compare_images(main_img, compare_imgs, name)
87
+
88
+ # Prepare outputs
89
+ images = [result[0] for result in results]
90
+ scores = [f"Image: {os.path.basename(result[0].filename)} -> Similarity: {result[1]:.2f}%" for result in
91
+ results]
92
+
93
+ return images, "\n".join(scores)
94
+
95
+
96
+ submit_btn.click(
97
+ fn=process_comparison,
98
+ inputs=[main_image, compare_images_input, modelName],
99
+ outputs=[gallery, similarity_text]
100
+ )
101
+
102
+ demo.launch()
bovw-codebook.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62404bfa83e913f1b009be97230b0dc3ae0e54f0ee1a4b06f1ae79a8e35672e
3
+ size 92383
bovw_embedding.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ from PIL import Image
4
+ import joblib
5
+ from scipy.cluster.vq import vq
6
+
7
+
8
+ class Bovw:
9
+ def __init__(self):
10
+ self.k, self.codebook = joblib.load("bovw-codebook.pkl")
11
+ self.idf = np.load("idf.npy")
12
+ self.sift = cv2.SIFT_create()
13
+
14
+ def get_embedding(self, pil_image: Image.Image) -> np.ndarray:
15
+ img_np = np.array(pil_image.convert("RGB"))
16
+ img_np = cv2.resize(img_np, (224, 224))
17
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY).astype('uint8')
18
+
19
+ keypoints, descriptors = self.sift.detectAndCompute(gray, None)
20
+
21
+ if descriptors is None or len(descriptors) == 0:
22
+ return np.zeros(self.k) # return zero-vector if no features found
23
+
24
+ visual_words, _ = vq(descriptors, self.codebook)
25
+
26
+ freq_vector = np.zeros(self.k)
27
+ for word in visual_words:
28
+ freq_vector[word] += 1
29
+
30
+ tfidf_vector = freq_vector * self.idf
31
+
32
+ return tfidf_vector
clip_embedding.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import CLIPProcessor, CLIPModel
3
+
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
5
+
6
+
7
+ class Clip:
8
+ def __init__(self):
9
+ self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
10
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
11
+
12
+ def get_embedding(self, img):
13
+ inputs = self.processor(images=img, return_tensors="pt").to(device)
14
+ with torch.no_grad():
15
+ embeddings = self.model.get_image_features(**inputs)
16
+ return embeddings
dino_embedding.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torchvision import transforms
3
+
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
5
+
6
+
7
+ class Dino:
8
+ def __init__(self):
9
+ self.model = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16').to(device)
10
+ self.model.eval()
11
+ self.transform = transforms.Compose([
12
+ transforms.Resize((224, 224)),
13
+ transforms.ToTensor(),
14
+ transforms.Normalize(mean=[0.5] * 3, std=[0.5] * 3),
15
+ ])
16
+
17
+ def get_embedding(self, image):
18
+ img_tensor = self.transform(image).unsqueeze(0)
19
+ with torch.no_grad():
20
+ embedding = self.model(img_tensor)
21
+ return embedding
efficientnet_embedding.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision import models, transforms
2
+ import torch
3
+
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
5
+
6
+
7
+ class EfficientNet:
8
+ def __init__(self):
9
+ self.model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
10
+ self.model.classifier = torch.nn.Identity()
11
+ self.model.eval()
12
+ self.transform = transforms.Compose([
13
+ transforms.Resize((224, 224)),
14
+ transforms.ToTensor(),
15
+ ])
16
+
17
+ def get_embedding(self, image):
18
+ img_tensor = self.transform(image).unsqueeze(0)
19
+ with torch.no_grad():
20
+ embedding = self.model(img_tensor)
21
+ return embedding
histogram_embedding.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ def cosine(a, b):
6
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
7
+
8
+
9
+ def get_embedding(img, bins=32):
10
+ img = np.array(img)
11
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
12
+
13
+ img = cv2.resize(img, (224, 224))
14
+
15
+ blue = cv2.calcHist([img], [0], None, [bins], [0, 256])
16
+ green = cv2.calcHist([img], [1], None, [bins], [0, 256])
17
+ red = cv2.calcHist([img], [2], None, [bins], [0, 256])
18
+ vector = np.concatenate([blue, green, red], axis=0)
19
+ vector = vector.flatten() # Flatten to 1D array
20
+
21
+ return vector
idf.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5acb3dab7e07a75564a93afd5ccf808d88ef864518c8da5e4b13dd1798e7642a
3
+ size 1728
requirements.txt ADDED
Binary file (2.54 kB). View file
 
resnet_embedding.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision
3
+ import torchvision.models as models
4
+ import torchvision.transforms as transforms
5
+
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
+
8
+
9
+ class Resnet:
10
+ def __init__(self):
11
+ self.model = models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT).to(device)
12
+ self.model = torch.nn.Sequential(*list(self.model.children())[:-1])
13
+ self.model.eval()
14
+ self.transform = transforms.Compose([
15
+ transforms.Resize((224, 224)),
16
+ transforms.ToTensor(),
17
+ ])
18
+
19
+ def get_embedding(self, image):
20
+ img_tensor = self.transform(image).unsqueeze(0)
21
+ with torch.no_grad():
22
+ embedding = self.model(img_tensor).squeeze()
23
+ return embedding
vit_embedding.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import ViTImageProcessor, ViTModel
2
+ import torch
3
+
4
+ device = "cuda" if torch.cuda.is_available() else "cpu"
5
+
6
+
7
+ class Vit:
8
+ def __init__(self):
9
+ self.model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k").to(device)
10
+ self.processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
11
+ self.model.eval()
12
+
13
+ def get_embedding(self, image):
14
+ inputs = self.processor(images=image, return_tensors="pt").to(device)
15
+ with torch.no_grad():
16
+ outputs = self.model(**inputs)
17
+ embedding = outputs.last_hidden_state[:, 0, :]
18
+ return embedding