File size: 4,820 Bytes
a87efec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from pinecone import Pinecone
import os
from PIL import Image
import requests
from transformers import AutoProcessor, CLIPModel
import numpy as np
import torch

# βœ… Set Page Config (Must be the first Streamlit command)
st.set_page_config(page_title="Image Search App", layout="wide", initial_sidebar_state="expanded")

# βœ… Initialize Pinecone
pc = Pinecone(api_key="pcsk_6r4DPn_4P9LckhZak3PhebvSebnEBKQZuzYFeJL2X93LtLxZVBxyJ93inBAktefa8usvJC")  # Replace with your Pinecone API key
index_name = "unsplash-index"
unsplash_index = pc.Index(index_name)

# βœ… Load CLIP Model & Processor
@st.cache_resource
def load_clip_model():
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
    return model, processor

model, processor = load_clip_model()

# βœ… Sidebar for Inputs
st.sidebar.title("πŸ” Search Options")
top_k = st.sidebar.slider("πŸ”’ Number of Similar Images", 1, 20, 10)

# πŸ“Œ **Option 1: Text-to-Image Search**
st.sidebar.subheader("πŸ“ Search by Text")
search_query = st.sidebar.text_input("Enter a description (e.g., 'a cute cat', 'a red car')")
text_search_btn = st.sidebar.button("πŸ” Search by Text")

# πŸ“Œ **Option 2: Image-to-Image Search**
st.sidebar.subheader("πŸ–ΌοΈ Search by Image")
uploaded_file = st.sidebar.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
image_search_btn = st.sidebar.button("πŸ” Search by Image")

# βœ… Function to Generate Embedding from Text
def get_text_embedding(text):
    inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        text_features = model.get_text_features(**inputs)
    return text_features.detach().cpu().numpy().flatten().tolist()

# βœ… Function to Generate Embedding from Image
def get_image_embedding(image):
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
    return image_features.detach().cpu().numpy().flatten().tolist()

# βœ… Function to Query Pinecone and Fetch Similar Images
def search_similar_images(embedding, top_k=10):
    results = unsplash_index.query(
        vector=embedding,
        top_k=top_k,
        include_metadata=True,
        namespace="image-search-dataset"
    )
    return results.get("matches", [])

# βœ… Streamlit UI
st.title("πŸ” Image & Text Search with CLIP & Pinecone")

# πŸ“Œ **Process Text-to-Image Search**
if search_query and text_search_btn:
    with st.spinner("Generating embedding..."):
        embedding = get_text_embedding(search_query)
    with st.spinner("Searching for similar images..."):
        matches = search_similar_images(embedding, top_k=top_k)

    st.subheader("πŸ”Ž Top Similar Images")
    if matches:
        cols = st.columns(3)  # Arrange images in 3 columns
        for i, match in enumerate(matches):
            cosine_distance = 1 - match.get("score", 0)  # Convert similarity score to cosine distance
            photo_id = match.get("id", "Unknown ID")
            url = match.get("metadata", {}).get("url", None)

            with cols[i % 3]:  # Alternate images in columns
                st.write(f"πŸ“· **Photo ID**: {photo_id} | πŸ“ **Cosine Distance**: {cosine_distance:.4f}")
                if url:
                    st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True)
                else:
                    st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}")
    else:
        st.warning("⚠️ No similar images found!")

# πŸ“Œ **Process Image-to-Image Search**
if uploaded_file and image_search_btn:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_container_width=True)

    with st.spinner("Generating embedding..."):
        embedding = get_image_embedding(image)
    with st.spinner("Searching for similar images..."):
        matches = search_similar_images(embedding, top_k=top_k)

    st.subheader("πŸ”Ž Top Similar Images")
    if matches:
        cols = st.columns(3)  # Arrange in 3 columns
        for i, match in enumerate(matches):
            cosine_distance = 1 - match.get("score", 0)  # Convert similarity score to cosine distance
            photo_id = match.get("id", "Unknown ID")
            url = match.get("metadata", {}).get("url", None)

            with cols[i % 3]:  # Alternate images in columns
                st.write(f"πŸ“· **Photo ID**: {photo_id} | πŸ“ **Cosine Distance**: {cosine_distance:.4f}")
                if url:
                    st.image(url, caption=f"Photo ID: {photo_id}", use_container_width=True)
                else:
                    st.warning(f"⚠️ Image URL not found for Photo ID: {photo_id}")