kotlarska2 commited on
Commit
28bc16c
·
verified ·
1 Parent(s): 0d36995

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +194 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ from pathlib import Path
5
+ from torch.utils.data import Dataset, DataLoader
6
+ import os
7
+ import numpy as np
8
+ from numpy.linalg import norm
9
+ import matplotlib.pyplot as plt
10
+ import gradio as gr
11
+
12
+ # Cast 1 -------------------------------------------------------------------------
13
+
14
+ def get_clip_embeddings(input_data, input_type='text'):
15
+ # Load the CLIP model and processor
16
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
17
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
18
+
19
+ # Prepare the input based on the type
20
+ if input_type == 'text':
21
+ inputs = processor(text=input_data, return_tensors="pt", padding=True, truncation=True)
22
+ elif input_type == 'image':
23
+ if isinstance(input_data, str):
24
+ image = Image.open(input_data)
25
+ elif isinstance(input_data, Image.Image):
26
+ image = input_data
27
+ else:
28
+ raise ValueError("For image input, provide either a file path or a PIL Image object")
29
+ inputs = processor(images=image, return_tensors="pt")
30
+ else:
31
+ raise ValueError("Invalid input_type. Choose 'text' or 'image'")
32
+
33
+ # Get the embeddings
34
+ with torch.no_grad():
35
+ if input_type == 'text':
36
+ embeddings = model.get_text_features(**inputs)
37
+ else:
38
+ embeddings = model.get_image_features(**inputs)
39
+
40
+ return embeddings.numpy()
41
+
42
+ # Cast 2 -------------------------------------------------------------------------
43
+
44
+ class ImageDataset(Dataset):
45
+ def __init__(self, image_dir, processor):
46
+ self.image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
47
+ self.processor = processor
48
+
49
+ def __len__(self):
50
+ return len(self.image_paths)
51
+
52
+ def __getitem__(self, idx):
53
+ image = Image.open(self.image_paths[idx])
54
+ return self.processor(images=image, return_tensors="pt")['pixel_values'][0]
55
+
56
+ def get_clip_embeddings_batch(image_dir, batch_size=32, device='cuda'):
57
+ # Load the CLIP model and processor
58
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
59
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
60
+
61
+ # Create dataset and dataloader
62
+ dataset = ImageDataset(image_dir, processor)
63
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)
64
+
65
+ all_embeddings = []
66
+
67
+ model.eval()
68
+ with torch.no_grad():
69
+ for batch in dataloader:
70
+ batch = batch.to(device)
71
+ embeddings = model.get_image_features(pixel_values=batch)
72
+ all_embeddings.append(embeddings.cpu().numpy())
73
+
74
+ return np.concatenate(all_embeddings)
75
+
76
+ # Cast 3 -------------------------------------------------------------------------
77
+
78
+ # Funkcia na výpočet cosinovej similarity
79
+ def cosine_similarity(x, y):
80
+ return np.dot(x, y) / (norm(x) * norm(y))
81
+
82
+ # Funkcia na nájdenie indexov obrázkov
83
+ def maxCS_indices(text_input, embeddings):
84
+ text_embedding = get_clip_embeddings(text_input, input_type='text')
85
+
86
+ x = text_embedding
87
+ Y = embeddings
88
+
89
+ # print("Text embedding shape:", x.shape)
90
+ # print("Embeddings shape:", Y.shape)
91
+
92
+ # Vypočítaj cosinovú similaritu pre každý riadok matice Y
93
+ cosine_similarities = np.array([cosine_similarity(x, y) for y in Y])
94
+
95
+ # Získaj indexy štyroch vektorov s najväčšou cosinovou similaritou
96
+ maxCS_indices = np.argsort(cosine_similarities, axis = 0)[-4:]
97
+
98
+ # Výsledné vektory
99
+ least_similar_vectors = Y[maxCS_indices]
100
+
101
+ # print("Indexy vektorov s najmenšou cosinovou similaritou:", smallest_indices)
102
+ # print("Vektory s najmenšou cosinovou similaritou:\n", least_similar_vectors)
103
+
104
+ return(maxCS_indices)
105
+
106
+ # Cast 4 -------------------------------------------------------------------------
107
+
108
+ def which_images(images_folder, indices):
109
+ # Získání všech názvů obrázků ve složce
110
+ image_filenames = [f for f in os.listdir(images_folder) if f.endswith(('.jpg', '.png'))]
111
+
112
+ # Vytvoření numpy array z názvů obrázků
113
+ image_names_array = np.array(image_filenames)
114
+
115
+ # Vytvorenie vektora
116
+ image_names = (image_names_array[indices])
117
+ # print(image_names_array[smallest_indices])
118
+
119
+ # Transformácia z poľa na vektor
120
+ image_names_final = image_names.flatten()
121
+ # print(image_names_final)
122
+
123
+ return(image_names_final)
124
+
125
+ # Cast 5 -------------------------------------------------------------------------
126
+
127
+ def display_images(folder_path, image_names):
128
+ # Zložka s obrázkami
129
+ folder = Path(folder_path)
130
+
131
+ # Inicializuj subplots pre 2x2 grid (4 obrázky)
132
+ fig, axes = plt.subplots(1, 4, figsize=(20, 5))
133
+
134
+ # Prejdi cez všetky zadané obrázky a vykresli ich
135
+ for ax, img_name in zip(axes, image_names):
136
+ # Načítaj obrázok
137
+ img_path = folder / img_name
138
+ img = Image.open(img_path)
139
+
140
+ # Vykresli obrázok na subplot
141
+ ax.imshow(img)
142
+ ax.set_title(img_name) # Nastav názov obrázka ako titulok
143
+ ax.axis('off') # Skry výstup osí
144
+
145
+ # Zobraz obrázky
146
+ plt.show()
147
+
148
+ # Cast 6 -------------------------------------------------------------------------
149
+
150
+ # Nastavenie parametrov pre funkciu process_input
151
+ images_folder = "kotlarska2/Trains"
152
+ device = "cuda" if torch.cuda.is_available() else "cpu"
153
+ embeddings = get_clip_embeddings_batch(images_folder, 32, device)
154
+
155
+ # Hlavná funkcia na spracovanie vstupu a zobrazenie obrázkov
156
+ def process_input(text_input):
157
+ our_indinces = maxCS_indices(text_input, embeddings)
158
+ our_images = which_images(images_folder, our_indinces)
159
+ return display_our_images(images_folder, our_images)
160
+
161
+ # Funkcia na zobrazenie obrázkov
162
+ def display_our_images(folder_path, image_names):
163
+ # Zložka s obrázkami
164
+ folder = Path(folder_path)
165
+
166
+ # Inicializuj subplots pre 2x2 grid (4 obrázky)
167
+ fig, axes = plt.subplots(1, 4, figsize=(20, 5))
168
+
169
+ # Prejdi cez všetky zadané obrázky a vykresli ich
170
+ for ax, img_name in zip(axes, image_names):
171
+ # Načítaj obrázok
172
+ img_path = folder / img_name
173
+ img = Image.open(img_path)
174
+
175
+ # Vykresli obrázok na subplot
176
+ ax.imshow(img)
177
+ ax.set_title(img_name) # Nastav názov obrázka ako titulok
178
+ ax.axis('off') # Skry výstup osí
179
+
180
+ # Ulož obrázok do súboru
181
+ plt.tight_layout()
182
+ plt.savefig('output_images.png')
183
+ plt.close()
184
+
185
+ return 'output_images.png'
186
+
187
+ # Nastav a spusti Gradio rozhranie
188
+ iface = gr.Interface(
189
+ fn=process_input,
190
+ inputs="text",
191
+ outputs="image",
192
+ title="Image Similarity",
193
+ description="Zadaj text a zobrazia sa 4 najpodobnejšie obrázky z našej databázy SUV vozidiel.")
194
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.26.4
2
+ scipy==1.11.4
3
+ scikit-learn==1.3.2
4
+ fastai==2.7.17
5
+ gradio==4.44.1
6
+ timm==1.0.9
7
+ torch==2.2.1
8
+ torchvision==0.17.1
9
+ transformers==4.29.0
10
+ Pillow==9.4.0
11
+ setuptools