tharu22 commited on
Commit
a584599
Β·
1 Parent(s): f3631d8
Files changed (1) hide show
  1. app.py +70 -107
app.py CHANGED
@@ -1,119 +1,82 @@
1
- import os
2
  import streamlit as st
3
- import requests
4
- import torch
5
- import pinecone
6
- import numpy as np
7
- from io import BytesIO
8
  from PIL import Image
 
9
  from transformers import AutoProcessor, CLIPModel
10
- import logging
11
- import time
12
-
13
- # βœ… Configure Logging
14
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
15
- logger = logging.getLogger(__name__)
16
-
17
- # βœ… Initialize Pinecone
18
- PINECONE_API_KEY = "pcsk_6r4DPn_4P9LckhZak3PhebvSebnEBKQZuzYFeJL2X93LtLxZVBxyJ93inBAktefa8usvJC" # Replace with your API Key
19
- index_name = "unsplash-index"
20
-
21
- # βœ… Connect to Pinecone
22
- pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
23
 
24
- # βœ… Check if the index exists, otherwise create it
25
- existing_indexes = [index.name for index in pc.list_indexes()]
26
- if index_name not in existing_indexes:
27
- pc.create_index(
28
- name=index_name,
29
- metric="cosine",
30
- dimension=512,
31
- spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
32
- )
33
- while not pc.describe_index(index_name).status.get("ready", False):
34
- logger.info("Waiting for index to be ready...")
35
- time.sleep(1)
36
 
37
- # Connect to Pinecone index
38
- index = pc.Index(index_name)
 
 
39
 
40
- # βœ… Load CLIP Model
41
  @st.cache_resource
42
- def load_clip():
43
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
44
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
45
  return model, processor
46
 
47
- model, processor = load_clip()
48
-
49
- # βœ… Streamlit UI
50
- st.title("πŸ” Image & Text Search with CLIP & Pinecone")
51
-
52
- # πŸ“Œ **Option 1: Upload Image for Search**
53
- st.subheader("πŸ“€ Upload an Image to Search")
54
- uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
55
-
56
- if uploaded_file:
57
- # Convert file to Image
58
- image = Image.open(uploaded_file).convert("RGB")
59
- st.image(image, caption="Uploaded Image", use_column_width=True)
60
-
61
- # Process image with CLIP
62
- inputs = processor(images=image, return_tensors="pt")
63
- with torch.no_grad():
64
- image_features = model.get_image_features(**inputs)
65
-
66
- # Convert to NumPy & flatten
67
- embeddings = image_features.detach().cpu().numpy().flatten().tolist()
68
-
69
- # βœ… Fix: Proper Query Format
70
- st.subheader("πŸ”Ž Find Similar Images")
71
- if st.button("Search Similar Images"):
72
- search_results = index.query(vector=embeddings, top_k=5, include_metadata=True)
73
-
74
- if search_results and search_results["matches"]:
75
- for match in search_results["matches"]:
76
- match_id = match.get("id", "Unknown ID")
77
- match_score = match.get("score", 0)
78
- metadata = match.get("metadata", {})
79
-
80
- image_url = metadata.get("url", None)
81
- if image_url:
82
- st.write(f"πŸ”Ή **Match Score:** {match_score}")
83
- st.image(image_url, caption=f"Similar Image - {match_id}")
84
- else:
85
- st.write(f"πŸ”Ή Match ID: {match_id} (No Image URL Available)")
86
- else:
87
- st.warning("No similar images found.")
88
-
89
- # πŸ“Œ **Option 2: Text Search**
90
- st.subheader("πŸ” Search Images with Text")
91
- text_query = st.text_input("Enter a description (e.g., 'a cute cat' or 'a red car')")
92
-
93
- if text_query and st.button("Search with Text"):
94
- # Convert text to CLIP embedding
95
- inputs = processor(text=text_query, return_tensors="pt")
96
- with torch.no_grad():
97
- text_features = model.get_text_features(**inputs)
98
-
99
- # Convert to NumPy & flatten
100
- text_embeddings = text_features.detach().cpu().numpy().flatten().tolist()
101
-
102
- # βœ… Fix: Proper Query Format for Text Search
103
- search_results = index.query(vector=text_embeddings, top_k=5, include_metadata=True)
104
-
105
- # βœ… Display results
106
- if search_results and search_results["matches"]:
107
- for match in search_results["matches"]:
108
- match_id = match.get("id", "Unknown ID")
109
- match_score = match.get("score", 0)
110
- metadata = match.get("metadata", {})
111
-
112
- image_url = metadata.get("url", None)
113
- if image_url:
114
- st.write(f"πŸ”Ή **Match Score:** {match_score}")
115
- st.image(image_url, caption=f"Matched Image - {match_id}")
116
- else:
117
- st.write(f"πŸ”Ή Match ID: {match_id} (No Image URL Available)")
118
  else:
119
- st.warning("No matching images found.")
 
 
 
 
 
 
1
  import streamlit as st
2
+ from pinecone import Pinecone
3
+ from dotenv import load_dotenv
4
+ import os
 
 
5
  from PIL import Image
6
+ import requests
7
  from transformers import AutoProcessor, CLIPModel
8
+ import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Load environment variables
11
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Initialize Pinecone
14
+ pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
15
+ index_name = "image-index-50000"
16
+ unsplash_index = pc.Index(index_name)
17
 
18
+ # Load CLIP model and processor
19
  @st.cache_resource
20
+ def load_clip_model():
21
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
22
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
23
  return model, processor
24
 
25
+ model, processor = load_clip_model()
26
+
27
+ # Function to generate embedding from text
28
+ def get_text_embedding(text):
29
+ inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
30
+ text_features = model.get_text_features(**inputs)
31
+ embedding = text_features.detach().cpu().numpy().flatten().tolist()
32
+ return embedding
33
+
34
+ # Function to query Pinecone and fetch similar images
35
+ def search_similar_images(embedding, top_k=10):
36
+ results = unsplash_index.query(
37
+ vector=embedding,
38
+ top_k=top_k,
39
+ include_metadata=True,
40
+ namespace="image-search-dataset"
41
+ )
42
+ return results["matches"]
43
+
44
+ # Streamlit UI
45
+ st.title("πŸ”Text-to-Image Lookup")
46
+ st.write("Enter a description to find similar images!")
47
+
48
+ # Text input widget
49
+ search_query = st.text_input("Enter your search query (e.g.Flower)")
50
+
51
+ # Search button
52
+ if st.button("Search"):
53
+ if search_query:
54
+ # Generate embedding from text
55
+ with st.spinner("Generating embedding..."):
56
+ embedding = get_text_embedding(search_query)
57
+
58
+ # Search for similar images
59
+ with st.spinner("Searching for similar images..."):
60
+ matches = search_similar_images(embedding, top_k=10)
61
+
62
+ # Display results
63
+ st.subheader("Top Similar Images")
64
+ for match in matches:
65
+ score = match["score"]
66
+ photo_id = match["id"]
67
+ url = match["metadata"]["url"]
68
+ st.write(f"**Photo ID**: {photo_id} | **Similarity Score**: {score:.4f}")
69
+ try:
70
+ # Fetch and display the image from the URL
71
+ response = requests.get(url, stream=True)
72
+ response.raw.decode_content = True # Handle content-encoding
73
+ img = Image.open(response.raw)
74
+ st.image(img, caption=f"Photo ID: {photo_id}", use_container_width=True)
75
+ except Exception as e:
76
+ st.error(f"Could not load image from {url}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  else:
78
+ st.warning("Please enter a search query!")
79
+
80
+ # Instructions
81
+ st.write("---")
82
+ st.write("Note: This app searches an Unsplash dataset indexed in Pinecone using CLIP embeddings based on your text description.")