import streamlit as st from pinecone import Pinecone import os from PIL import Image import requests from transformers import AutoProcessor, CLIPModel import numpy as np import torch # ✅ Set Page Config (Must be the first Streamlit command) st.set_page_config(page_title="Image Search App", layout="wide", initial_sidebar_state="expanded") # ✅ Initialize Pinecone pc = Pinecone(api_key="pcsk_6r4DPn_4P9LckhZak3PhebvSebnEBKQZuzYFeJL2X93LtLxZVBxyJ93inBAktefa8usvJC") # Replace with your Pinecone API key index_name = "unsplash-index" unsplash_index = pc.Index(index_name) # ✅ Load CLIP Model & Processor @st.cache_resource def load_clip_model(): model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") return model, processor model, processor = load_clip_model() # ✅ Sidebar for Inputs st.sidebar.title("🔍 Search Options") top_k = st.sidebar.slider("🔢 Number of Similar Images", 1, 20, 10) # 📌 **Option 1: Text-to-Image Search** st.sidebar.subheader("📝 Search by Text") search_query = st.sidebar.text_input("Enter a description (e.g., 'a cute cat', 'a red car')") text_search_btn = st.sidebar.button("🔍 Search by Text") # 📌 **Option 2: Image-to-Image Search** st.sidebar.subheader("🖼️ Search by Image") uploaded_file = st.sidebar.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"]) image_search_btn = st.sidebar.button("🔍 Search by Image") # ✅ Function to Generate Embedding from Text def get_text_embedding(text): inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True) with torch.no_grad(): text_features = model.get_text_features(**inputs) return text_features.detach().cpu().numpy().flatten().tolist() # ✅ Function to Generate Embedding from Image def get_image_embedding(image): inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): image_features = model.get_image_features(**inputs) return image_features.detach().cpu().numpy().flatten().tolist() # ✅ Function to Query Pinecone and Fetch Similar Images def search_similar_images(embedding, top_k=10): results = unsplash_index.query( vector=embedding, top_k=top_k, include_metadata=True, namespace="image-search-dataset" ) return results.get("matches", []) # ✅ Streamlit UI st.title("🔍 Image & Text Search with CLIP & Pinecone") # 📌 **Process Text-to-Image Search** if search_query and text_search_btn: with st.spinner("Generating embedding..."): embedding = get_text_embedding(search_query) with st.spinner("Searching for similar images..."): matches = search_similar_images(embedding, top_k=top_k) st.subheader("🔎 Top Similar Images") if matches: cols = st.columns(3) # Arrange images in 3 columns for i, match in enumerate(matches): cosine_distance = 1 - match.get("score", 0) # Convert similarity score to cosine distance photo_id = match.get("id", "Unknown ID") url = match.get("metadata", {}).get("url", None) with cols[i % 3]: # Alternate images in columns st.write(f"📷 **Photo ID**: {photo_id} | 📏 **Cosine Distance**: {cosine_distance:.4f}") if url: st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True) else: st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}") else: st.warning("⚠️ No similar images found!") # 📌 **Process Image-to-Image Search** if uploaded_file and image_search_btn: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="Uploaded Image", use_container_width=True) with st.spinner("Generating embedding..."): embedding = get_image_embedding(image) with st.spinner("Searching for similar images..."): matches = search_similar_images(embedding, top_k=top_k) st.subheader("🔎 Top Similar Images") if matches: cols = st.columns(3) # Arrange in 3 columns for i, match in enumerate(matches): cosine_distance = 1 - match.get("score", 0) # Convert similarity score to cosine distance photo_id = match.get("id", "Unknown ID") url = match.get("metadata", {}).get("url", None) with cols[i % 3]: # Alternate images in columns st.write(f"📷 **Photo ID**: {photo_id} | 📏 **Cosine Distance**: {cosine_distance:.4f}") if url: st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True) else: st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}")