Spaces:

tharu22
/

clip

Running

App Files Files Community

tharu22 commited on Feb 27, 2025

Commit

a584599

1 Parent(s): f3631d8

mes

Browse files

Files changed (1) hide show

app.py +70 -107

app.py CHANGED Viewed

@@ -1,119 +1,82 @@
-import os
 import streamlit as st
-import requests
-import torch
-import pinecone
-import numpy as np
-from io import BytesIO
 from PIL import Image
 from transformers import AutoProcessor, CLIPModel
-import logging
-import time
-# ✅ Configure Logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
-# ✅ Initialize Pinecone
-PINECONE_API_KEY = "pcsk_6r4DPn_4P9LckhZak3PhebvSebnEBKQZuzYFeJL2X93LtLxZVBxyJ93inBAktefa8usvJC"  # Replace with your API Key
-index_name = "unsplash-index"
-# ✅ Connect to Pinecone
-pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
-# ✅ Check if the index exists, otherwise create it
-existing_indexes = [index.name for index in pc.list_indexes()]
-if index_name not in existing_indexes:
-    pc.create_index(
-        name=index_name,
-        metric="cosine",
-        dimension=512,
-        spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
-    )
-    while not pc.describe_index(index_name).status.get("ready", False):
-        logger.info("Waiting for index to be ready...")
-        time.sleep(1)
-# Connect to Pinecone index
-index = pc.Index(index_name)
-# ✅ Load CLIP Model
 @st.cache_resource
-def load_clip():
     model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
     processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
     return model, processor
-model, processor = load_clip()
-# ✅ Streamlit UI
-st.title("🔍 Image & Text Search with CLIP & Pinecone")
-# 📌 **Option 1: Upload Image for Search**
-st.subheader("📤 Upload an Image to Search")
-uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
-if uploaded_file:
-    # Convert file to Image
-    image = Image.open(uploaded_file).convert("RGB")
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    # Process image with CLIP
-    inputs = processor(images=image, return_tensors="pt")
-    with torch.no_grad():
-        image_features = model.get_image_features(**inputs)
-    # Convert to NumPy & flatten
-    embeddings = image_features.detach().cpu().numpy().flatten().tolist()
-    # ✅ Fix: Proper Query Format
-    st.subheader("🔎 Find Similar Images")
-    if st.button("Search Similar Images"):
-        search_results = index.query(vector=embeddings, top_k=5, include_metadata=True)
-        if search_results and search_results["matches"]:
-            for match in search_results["matches"]:
-                match_id = match.get("id", "Unknown ID")
-                match_score = match.get("score", 0)
-                metadata = match.get("metadata", {})
-                image_url = metadata.get("url", None)
-                if image_url:
-                    st.write(f"🔹 **Match Score:** {match_score}")
-                    st.image(image_url, caption=f"Similar Image - {match_id}")
-                else:
-                    st.write(f"🔹 Match ID: {match_id} (No Image URL Available)")
-        else:
-            st.warning("No similar images found.")
-# 📌 **Option 2: Text Search**
-st.subheader("🔍 Search Images with Text")
-text_query = st.text_input("Enter a description (e.g., 'a cute cat' or 'a red car')")
-if text_query and st.button("Search with Text"):
-    # Convert text to CLIP embedding
-    inputs = processor(text=text_query, return_tensors="pt")
-    with torch.no_grad():
-        text_features = model.get_text_features(**inputs)
-    # Convert to NumPy & flatten
-    text_embeddings = text_features.detach().cpu().numpy().flatten().tolist()
-    # ✅ Fix: Proper Query Format for Text Search
-    search_results = index.query(vector=text_embeddings, top_k=5, include_metadata=True)
-    # ✅ Display results
-    if search_results and search_results["matches"]:
-        for match in search_results["matches"]:
-            match_id = match.get("id", "Unknown ID")
-            match_score = match.get("score", 0)
-            metadata = match.get("metadata", {})
-            image_url = metadata.get("url", None)
-            if image_url:
-                st.write(f"🔹 **Match Score:** {match_score}")
-                st.image(image_url, caption=f"Matched Image - {match_id}")
-            else:
-                st.write(f"🔹 Match ID: {match_id} (No Image URL Available)")
     else:
-        st.warning("No matching images found.")

 import streamlit as st
+from pinecone import Pinecone
+from dotenv import load_dotenv
+import os
 from PIL import Image
+import requests
 from transformers import AutoProcessor, CLIPModel
+import numpy as np
+# Load environment variables
+load_dotenv()
+# Initialize Pinecone
+pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
+index_name = "image-index-50000"
+unsplash_index = pc.Index(index_name)
+# Load CLIP model and processor
 @st.cache_resource
+def load_clip_model():
     model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
     processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
     return model, processor
+model, processor = load_clip_model()
+# Function to generate embedding from text
+def get_text_embedding(text):
+    inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
+    text_features = model.get_text_features(**inputs)
+    embedding = text_features.detach().cpu().numpy().flatten().tolist()
+    return embedding
+# Function to query Pinecone and fetch similar images
+def search_similar_images(embedding, top_k=10):
+    results = unsplash_index.query(
+        vector=embedding,
+        top_k=top_k,
+        include_metadata=True,
+        namespace="image-search-dataset"
+    )
+    return results["matches"]
+# Streamlit UI
+st.title("🔍Text-to-Image Lookup")
+st.write("Enter a description to find similar images!")
+# Text input widget
+search_query = st.text_input("Enter your search query (e.g.Flower)")
+# Search button
+if st.button("Search"):
+    if search_query:
+        # Generate embedding from text
+        with st.spinner("Generating embedding..."):
+            embedding = get_text_embedding(search_query)
+        # Search for similar images
+        with st.spinner("Searching for similar images..."):
+            matches = search_similar_images(embedding, top_k=10)
+        # Display results
+        st.subheader("Top Similar Images")
+        for match in matches:
+            score = match["score"]
+            photo_id = match["id"]
+            url = match["metadata"]["url"]
+            st.write(f"**Photo ID**: {photo_id} | **Similarity Score**: {score:.4f}")
+            try:
+                # Fetch and display the image from the URL
+                response = requests.get(url, stream=True)
+                response.raw.decode_content = True  # Handle content-encoding
+                img = Image.open(response.raw)
+                st.image(img, caption=f"Photo ID: {photo_id}", use_container_width=True)
+            except Exception as e:
+                st.error(f"Could not load image from {url}: {e}")
     else:
+        st.warning("Please enter a search query!")
+# Instructions
+st.write("---")
+st.write("Note: This app searches an Unsplash dataset indexed in Pinecone using CLIP embeddings based on your text description.")