jjderz's picture
Create app.py
080dd7f
import streamlit as s
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline
!wget http://vis-www.cs.umass.edu/lfw/lfw.tgz
!tar -xvf /content/lfw.tgz
!pip install tensorflow
!pip install tqdm
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.preprocessing import image
from PIL import Image
from tqdm import tqdm
resnet50_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
def get_image_features(img_path):
loaded_img = image.load_img(img_path, target_size=(224, 224))
img_array = image.img_to_array(loaded_img)
expanded_array = np.expand_dims(img_array, axis=0)
preprocessed_img = preprocess_input(expanded_array)
features = resnet50_model.predict(preprocessed_img)
return features.flatten()
extracted_features = {}
lfw_dir = '/content/lfw'
all_files = []
for root_dir, subdir_list, file_list in os.walk(lfw_dir):
for file_name in file_list:
if file_name.endswith('.jpg'):
img_path = os.path.join(root_dir, file_name)
all_files.append(img_path)
for file_path in tqdm(all_files, desc="Processing images"):
img_features = get_image_features(file_path)
file_name = os.path.basename(file_path)
extracted_features[file_name] = img_features
from sklearn.neighbors import NearestNeighbors
def find_similar_images(target_image, feature_dictionary, num_neighbors=10):
img_names = list(feature_dictionary.keys())
features_list = np.array([feature_dictionary[name] for name in img_names])
neighbors_model = NearestNeighbors(n_neighbors=num_neighbors, algorithm='auto', metric='euclidean')
neighbors_model.fit(features_list)
target_image_feature = feature_dictionary[target_image].reshape(1, -1)
_, img_indices = neighbors_model.kneighbors(target_image_feature)
retrieved_images = [img_names[index] for index in img_indices.flatten()]
return retrieved_images
query_img = 'Francis_Ricciardone_0001.jpg'
similar_imgs = find_similar_images(query_img, extracted_features, num_neighbors=11)
print(f"Images similar to {query_img}:")
for count, img_name in enumerate(similar_imgs[1:], start=1):
print(f"{count}: {img_name}")