molehh commited on
Commit
fe1a29a
·
1 Parent(s): 1f13a56

modified file

Browse files
Files changed (2) hide show
  1. README.md +39 -0
  2. src/frontend/app.py +37 -10
README.md CHANGED
@@ -10,3 +10,42 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ # clip-search
15
+ 📸Image Search with Pinecone and CLIP
16
+
17
+ 🎈Overview
18
+
19
+ ♦This project implements an image search engine using OpenAI's CLIP model and Pinecone for vector search. The application allows users to search for images either by text or by uploading an image.
20
+
21
+ Features
22
+
23
+ 🎈Image-to-Image Search: Upload an image to find visually similar images.
24
+
25
+ 🎈Text-to-Image Search: Enter a text query to find matching images.
26
+
27
+ 🎈Pinecone Integration: Uses Pinecone as a vector database for fast similarity searches.
28
+
29
+ 🎈Streamlit UI: A user-friendly interface for searching images.
30
+
31
+ 🎈Dataset Management: Reads image metadata from a dataset and indexes it into Pinecone.
32
+
33
+ ♦Users can upload an image, and the system finds visually similar images from the dataset.
34
+
35
+ ♦The uploaded image is converted into an embedding using CLIP, and similar images are retrieved from Pinecone.
36
+
37
+ ♦Users can enter a text query to find images matching the description.
38
+
39
+ ♦The text is converted into an embedding, and Pinecone retrieves the closest image embeddings.
40
+
41
+ ♦Uses Pinecone as a high-speed vector database.
42
+
43
+ ♦Efficiently stores and retrieves embeddings for similarity searches.
44
+
45
+ ♦Simple web-based interface for performing searches.
46
+
47
+ ♦Supports both text and image-based queries.
48
+
49
+ ♦Reads image metadata from a dataset (TSV file).
50
+
51
+ ♦Extracts image embeddings using CLIP and stores them in Pinecone.
src/frontend/app.py CHANGED
@@ -6,18 +6,12 @@ from transformers import AutoProcessor, CLIPModel
6
  import streamlit as st
7
  from utils import logger
8
  from database import pinecone_index
9
- # from model.clip_model import ClipModel
10
- # from data import data_set
11
 
12
  logger = logger.get_logger()
13
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
14
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
15
 
16
- PAGE_TITLE = "Clip Search"
17
- PAGE_LAYOUT = "wide"
18
- SIDEBAR_TITLE = "Find Similar Images"
19
- PHOTO_ID_KEY = "photo_id"
20
- IMAGE_URL_KEY = "photo_image_url"
21
  PINECONE_INDEX = pinecone_index.create_index()
22
 
23
  def search_by_text(query_text, index):
@@ -27,18 +21,51 @@ def search_by_text(query_text, index):
27
  results = index.query(vector=query_vector, top_k=10, include_metadata=True, namespace="image-search-dataset")
28
  return results
29
 
 
 
 
 
 
 
 
30
  def main():
31
- st.set_page_config(page_title=PAGE_TITLE, layout=PAGE_LAYOUT)
32
  st.title("📸Image Search with Pinecone and CLIP")
33
  option = st.selectbox("Choose Input Type", ["Text", "Image Upload"])
 
34
  if option == "Text":
35
  user_text = st.text_input("Enter your search text", placeholder = "for eg: dogs or cat etc..")
 
36
  if st.button("Search"):
37
  results = search_by_text(user_text, PINECONE_INDEX)
38
- for match in results['matches']:
39
- st.image(match['metadata']['url'], caption=f"Match: {match['metadata']['photo_id']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
41
 
 
 
 
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
  main()
 
6
  import streamlit as st
7
  from utils import logger
8
  from database import pinecone_index
9
+ from PIL import Image
 
10
 
11
  logger = logger.get_logger()
12
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
 
 
 
 
 
15
  PINECONE_INDEX = pinecone_index.create_index()
16
 
17
  def search_by_text(query_text, index):
 
21
  results = index.query(vector=query_vector, top_k=10, include_metadata=True, namespace="image-search-dataset")
22
  return results
23
 
24
+ def search_by_image(image, index):
25
+ inputs = processor(images=image, return_tensors="pt")
26
+ image_features = model.get_image_features(**inputs)
27
+ query_vector = image_features.detach().cpu().numpy().flatten().tolist()
28
+ results = index.query(vector=query_vector, top_k=5, include_metadata=True, namespace="image-search-dataset")
29
+ return results
30
+
31
  def main():
32
+ st.set_page_config(page_title="Clip Search", layout="wide")
33
  st.title("📸Image Search with Pinecone and CLIP")
34
  option = st.selectbox("Choose Input Type", ["Text", "Image Upload"])
35
+
36
  if option == "Text":
37
  user_text = st.text_input("Enter your search text", placeholder = "for eg: dogs or cat etc..")
38
+
39
  if st.button("Search"):
40
  results = search_by_text(user_text, PINECONE_INDEX)
41
+
42
+ columns = st.columns(2)
43
+
44
+ for idx, match in enumerate(results['matches']):
45
+ with columns[idx % 2]:
46
+ st.image(
47
+ match['metadata']['url'],
48
+ caption=f"Match: {match['metadata']['photo_id']}",
49
+ width=500
50
+ )
51
+
52
+ elif option == "Image Upload":
53
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
54
+ if uploaded_file is not None:
55
+ image = Image.open(uploaded_file)
56
+ st.image(image, caption="Uploaded Image")
57
+ if st.button("Search by Image"):
58
+ results = search_by_image(image, PINECONE_INDEX)
59
 
60
+ columns = st.columns(2)
61
 
62
+ for idx, match in enumerate(results['matches']):
63
+ with columns[idx % 2]:
64
+ st.image(
65
+ match['metadata']['url'],
66
+ caption=f"Match: {match['metadata']['photo_id']}",
67
+ width=500
68
+ )
69
 
70
  if __name__ == "__main__":
71
  main()