Spaces:

tharu22
/

image

Sleeping

File size: 4,820 Bytes

a87efec

import streamlit as st
from pinecone import Pinecone
import os
from PIL import Image
import requests
from transformers import AutoProcessor, CLIPModel
import numpy as np
import torch

# ✅ Set Page Config (Must be the first Streamlit command)
st.set_page_config(page_title="Image Search App", layout="wide", initial_sidebar_state="expanded")

# ✅ Initialize Pinecone
pc = Pinecone(api_key="pcsk_6r4DPn_4P9LckhZak3PhebvSebnEBKQZuzYFeJL2X93LtLxZVBxyJ93inBAktefa8usvJC")  # Replace with your Pinecone API key
index_name = "unsplash-index"
unsplash_index = pc.Index(index_name)

# ✅ Load CLIP Model & Processor
@st.cache_resource
def load_clip_model():
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
    return model, processor

model, processor = load_clip_model()

# ✅ Sidebar for Inputs
st.sidebar.title("🔍 Search Options")
top_k = st.sidebar.slider("🔢 Number of Similar Images", 1, 20, 10)

# 📌 **Option 1: Text-to-Image Search**
st.sidebar.subheader("📝 Search by Text")
search_query = st.sidebar.text_input("Enter a description (e.g., 'a cute cat', 'a red car')")
text_search_btn = st.sidebar.button("🔍 Search by Text")

# 📌 **Option 2: Image-to-Image Search**
st.sidebar.subheader("🖼️ Search by Image")
uploaded_file = st.sidebar.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
image_search_btn = st.sidebar.button("🔍 Search by Image")

# ✅ Function to Generate Embedding from Text
def get_text_embedding(text):
    inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        text_features = model.get_text_features(**inputs)
    return text_features.detach().cpu().numpy().flatten().tolist()

# ✅ Function to Generate Embedding from Image
def get_image_embedding(image):
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
    return image_features.detach().cpu().numpy().flatten().tolist()

# ✅ Function to Query Pinecone and Fetch Similar Images
def search_similar_images(embedding, top_k=10):
    results = unsplash_index.query(
        vector=embedding,
        top_k=top_k,
        include_metadata=True,
        namespace="image-search-dataset"
    )
    return results.get("matches", [])

# ✅ Streamlit UI
st.title("🔍 Image & Text Search with CLIP & Pinecone")

# 📌 **Process Text-to-Image Search**
if search_query and text_search_btn:
    with st.spinner("Generating embedding..."):
        embedding = get_text_embedding(search_query)
    with st.spinner("Searching for similar images..."):
        matches = search_similar_images(embedding, top_k=top_k)

    st.subheader("🔎 Top Similar Images")
    if matches:
        cols = st.columns(3)  # Arrange images in 3 columns
        for i, match in enumerate(matches):
            cosine_distance = 1 - match.get("score", 0)  # Convert similarity score to cosine distance
            photo_id = match.get("id", "Unknown ID")
            url = match.get("metadata", {}).get("url", None)

            with cols[i % 3]:  # Alternate images in columns
                st.write(f"📷 **Photo ID**: {photo_id} | 📏 **Cosine Distance**: {cosine_distance:.4f}")
                if url:
                    st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True)
                else:
                    st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}")
    else:
        st.warning("⚠️ No similar images found!")

# 📌 **Process Image-to-Image Search**
if uploaded_file and image_search_btn:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_container_width=True)

    with st.spinner("Generating embedding..."):
        embedding = get_image_embedding(image)
    with st.spinner("Searching for similar images..."):
        matches = search_similar_images(embedding, top_k=top_k)

    st.subheader("🔎 Top Similar Images")
    if matches:
        cols = st.columns(3)  # Arrange in 3 columns
        for i, match in enumerate(matches):
            cosine_distance = 1 - match.get("score", 0)  # Convert similarity score to cosine distance
            photo_id = match.get("id", "Unknown ID")
            url = match.get("metadata", {}).get("url", None)

            with cols[i % 3]:  # Alternate images in columns
                st.write(f"📷 **Photo ID**: {photo_id} | 📏 **Cosine Distance**: {cosine_distance:.4f}")
                if url:
                    st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True)
                else:
                    st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}")