File size: 2,324 Bytes
080dd7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as s
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline

!wget http://vis-www.cs.umass.edu/lfw/lfw.tgz
!tar -xvf /content/lfw.tgz

!pip install tensorflow
!pip install tqdm

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.preprocessing import image
from PIL import Image
from tqdm import tqdm

resnet50_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def get_image_features(img_path):
    loaded_img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(loaded_img)
    expanded_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_array)
    features = resnet50_model.predict(preprocessed_img)
    return features.flatten()

extracted_features = {}

lfw_dir = '/content/lfw'
all_files = []

for root_dir, subdir_list, file_list in os.walk(lfw_dir):
    for file_name in file_list:
        if file_name.endswith('.jpg'):
            img_path = os.path.join(root_dir, file_name)
            all_files.append(img_path)

for file_path in tqdm(all_files, desc="Processing images"):
    img_features = get_image_features(file_path)
    file_name = os.path.basename(file_path)
    extracted_features[file_name] = img_features

from sklearn.neighbors import NearestNeighbors

def find_similar_images(target_image, feature_dictionary, num_neighbors=10):
    img_names = list(feature_dictionary.keys())
    features_list = np.array([feature_dictionary[name] for name in img_names])
    neighbors_model = NearestNeighbors(n_neighbors=num_neighbors, algorithm='auto', metric='euclidean')
    neighbors_model.fit(features_list)
    target_image_feature = feature_dictionary[target_image].reshape(1, -1)
    _, img_indices = neighbors_model.kneighbors(target_image_feature)
    retrieved_images = [img_names[index] for index in img_indices.flatten()]
    return retrieved_images

query_img = 'Francis_Ricciardone_0001.jpg'
similar_imgs = find_similar_images(query_img, extracted_features, num_neighbors=11)

print(f"Images similar to {query_img}:")
for count, img_name in enumerate(similar_imgs[1:], start=1):
    print(f"{count}: {img_name}")