Spaces:
Build error
Build error
modified file
Browse files- README.md +39 -0
- src/frontend/app.py +37 -10
README.md
CHANGED
|
@@ -10,3 +10,42 @@ pinned: false
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 13 |
+
|
| 14 |
+
# clip-search
|
| 15 |
+
📸Image Search with Pinecone and CLIP
|
| 16 |
+
|
| 17 |
+
🎈Overview
|
| 18 |
+
|
| 19 |
+
♦This project implements an image search engine using OpenAI's CLIP model and Pinecone for vector search. The application allows users to search for images either by text or by uploading an image.
|
| 20 |
+
|
| 21 |
+
Features
|
| 22 |
+
|
| 23 |
+
🎈Image-to-Image Search: Upload an image to find visually similar images.
|
| 24 |
+
|
| 25 |
+
🎈Text-to-Image Search: Enter a text query to find matching images.
|
| 26 |
+
|
| 27 |
+
🎈Pinecone Integration: Uses Pinecone as a vector database for fast similarity searches.
|
| 28 |
+
|
| 29 |
+
🎈Streamlit UI: A user-friendly interface for searching images.
|
| 30 |
+
|
| 31 |
+
🎈Dataset Management: Reads image metadata from a dataset and indexes it into Pinecone.
|
| 32 |
+
|
| 33 |
+
♦Users can upload an image, and the system finds visually similar images from the dataset.
|
| 34 |
+
|
| 35 |
+
♦The uploaded image is converted into an embedding using CLIP, and similar images are retrieved from Pinecone.
|
| 36 |
+
|
| 37 |
+
♦Users can enter a text query to find images matching the description.
|
| 38 |
+
|
| 39 |
+
♦The text is converted into an embedding, and Pinecone retrieves the closest image embeddings.
|
| 40 |
+
|
| 41 |
+
♦Uses Pinecone as a high-speed vector database.
|
| 42 |
+
|
| 43 |
+
♦Efficiently stores and retrieves embeddings for similarity searches.
|
| 44 |
+
|
| 45 |
+
♦Simple web-based interface for performing searches.
|
| 46 |
+
|
| 47 |
+
♦Supports both text and image-based queries.
|
| 48 |
+
|
| 49 |
+
♦Reads image metadata from a dataset (TSV file).
|
| 50 |
+
|
| 51 |
+
♦Extracts image embeddings using CLIP and stores them in Pinecone.
|
src/frontend/app.py
CHANGED
|
@@ -6,18 +6,12 @@ from transformers import AutoProcessor, CLIPModel
|
|
| 6 |
import streamlit as st
|
| 7 |
from utils import logger
|
| 8 |
from database import pinecone_index
|
| 9 |
-
|
| 10 |
-
# from data import data_set
|
| 11 |
|
| 12 |
logger = logger.get_logger()
|
| 13 |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 14 |
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 15 |
|
| 16 |
-
PAGE_TITLE = "Clip Search"
|
| 17 |
-
PAGE_LAYOUT = "wide"
|
| 18 |
-
SIDEBAR_TITLE = "Find Similar Images"
|
| 19 |
-
PHOTO_ID_KEY = "photo_id"
|
| 20 |
-
IMAGE_URL_KEY = "photo_image_url"
|
| 21 |
PINECONE_INDEX = pinecone_index.create_index()
|
| 22 |
|
| 23 |
def search_by_text(query_text, index):
|
|
@@ -27,18 +21,51 @@ def search_by_text(query_text, index):
|
|
| 27 |
results = index.query(vector=query_vector, top_k=10, include_metadata=True, namespace="image-search-dataset")
|
| 28 |
return results
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def main():
|
| 31 |
-
st.set_page_config(page_title=
|
| 32 |
st.title("📸Image Search with Pinecone and CLIP")
|
| 33 |
option = st.selectbox("Choose Input Type", ["Text", "Image Upload"])
|
|
|
|
| 34 |
if option == "Text":
|
| 35 |
user_text = st.text_input("Enter your search text", placeholder = "for eg: dogs or cat etc..")
|
|
|
|
| 36 |
if st.button("Search"):
|
| 37 |
results = search_by_text(user_text, PINECONE_INDEX)
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
|
|
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
if __name__ == "__main__":
|
| 44 |
main()
|
|
|
|
| 6 |
import streamlit as st
|
| 7 |
from utils import logger
|
| 8 |
from database import pinecone_index
|
| 9 |
+
from PIL import Image
|
|
|
|
| 10 |
|
| 11 |
logger = logger.get_logger()
|
| 12 |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 13 |
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
PINECONE_INDEX = pinecone_index.create_index()
|
| 16 |
|
| 17 |
def search_by_text(query_text, index):
|
|
|
|
| 21 |
results = index.query(vector=query_vector, top_k=10, include_metadata=True, namespace="image-search-dataset")
|
| 22 |
return results
|
| 23 |
|
| 24 |
+
def search_by_image(image, index):
|
| 25 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 26 |
+
image_features = model.get_image_features(**inputs)
|
| 27 |
+
query_vector = image_features.detach().cpu().numpy().flatten().tolist()
|
| 28 |
+
results = index.query(vector=query_vector, top_k=5, include_metadata=True, namespace="image-search-dataset")
|
| 29 |
+
return results
|
| 30 |
+
|
| 31 |
def main():
|
| 32 |
+
st.set_page_config(page_title="Clip Search", layout="wide")
|
| 33 |
st.title("📸Image Search with Pinecone and CLIP")
|
| 34 |
option = st.selectbox("Choose Input Type", ["Text", "Image Upload"])
|
| 35 |
+
|
| 36 |
if option == "Text":
|
| 37 |
user_text = st.text_input("Enter your search text", placeholder = "for eg: dogs or cat etc..")
|
| 38 |
+
|
| 39 |
if st.button("Search"):
|
| 40 |
results = search_by_text(user_text, PINECONE_INDEX)
|
| 41 |
+
|
| 42 |
+
columns = st.columns(2)
|
| 43 |
+
|
| 44 |
+
for idx, match in enumerate(results['matches']):
|
| 45 |
+
with columns[idx % 2]:
|
| 46 |
+
st.image(
|
| 47 |
+
match['metadata']['url'],
|
| 48 |
+
caption=f"Match: {match['metadata']['photo_id']}",
|
| 49 |
+
width=500
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
elif option == "Image Upload":
|
| 53 |
+
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
|
| 54 |
+
if uploaded_file is not None:
|
| 55 |
+
image = Image.open(uploaded_file)
|
| 56 |
+
st.image(image, caption="Uploaded Image")
|
| 57 |
+
if st.button("Search by Image"):
|
| 58 |
+
results = search_by_image(image, PINECONE_INDEX)
|
| 59 |
|
| 60 |
+
columns = st.columns(2)
|
| 61 |
|
| 62 |
+
for idx, match in enumerate(results['matches']):
|
| 63 |
+
with columns[idx % 2]:
|
| 64 |
+
st.image(
|
| 65 |
+
match['metadata']['url'],
|
| 66 |
+
caption=f"Match: {match['metadata']['photo_id']}",
|
| 67 |
+
width=500
|
| 68 |
+
)
|
| 69 |
|
| 70 |
if __name__ == "__main__":
|
| 71 |
main()
|