Spaces:

velmurugan1122
/

Clip_Image_Search

Sleeping

App Files Files Community

velmurugan1122 commited on Mar 3, 2025

Commit

39bee95

1 Parent(s): 862e786

fix the changes

Browse files

Files changed (8) hide show

src/app.py +55 -62
src/data/__pycache__/dataset.cpython-313.pyc +0 -0
src/data/__pycache__/request_method.cpython-313.pyc +0 -0
src/database/__init__.py +0 -0
src/database/__pycache__/__init__.cpython-313.pyc +0 -0
src/database/__pycache__/create_pinecone_index.cpython-313.pyc +0 -0
src/database/create_pinecone_index.py +3 -3
src/model/clip_model.py +21 -21

src/app.py CHANGED Viewed

@@ -1,91 +1,84 @@
 import os
 import time
-import logging
 import streamlit as st
-import requests
-import torch
 from dotenv import load_dotenv
 from pinecone import Pinecone, ServerlessSpec
-from transformers import AutoTokenizer, CLIPModel, AutoProcessor
 from PIL import Image
-# Logging setup
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
-# Load environment variables
-load_dotenv()
-PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
-# HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
-# # Ensure Hugging Face authentication
-# from huggingface_hub import login
-# login(HF_ACCESS_TOKEN)
-# Load CLIP model and processor
-tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
 # Connect to Pinecone
-pc = Pinecone(api_key=PINECONE_API_KEY)
-# Ensure the index exists
 index_name = "index-search"
 if not pc.has_index(index_name):
     pc.create_index(name=index_name, metric="cosine",
                     dimension=512,
                     spec=ServerlessSpec(cloud="aws", region="us-east-1"))
-    time.sleep(5)  # Wait for index to initialize
-unsplash_index = pc.Index(index_name)
 # Streamlit UI
-st.title("Search Images by Text or Image")
-search_mode = st.radio("Choose search mode:", ["Text Search", "Image Search"])
-if search_mode == "Text Search":
-    search_query = st.text_input("Search (at least 3 characters)")
-    if len(search_query) >= 3:
-        with st.spinner("Searching images..."):
-            inputs = tokenizer([search_query], padding=True, return_tensors="pt")
-            text_features = model.get_text_features(**inputs)
-            text_embedding = text_features.detach().numpy().flatten().tolist()
-            response = unsplash_index.query(
-                top_k=10,
-                vector=text_embedding,
-                namespace="image-search-dataset",
-                include_metadata=True
-            )
-        # Display results
         cols = st.columns(2)
         for i, result in enumerate(response.matches):
             with cols[i % 2]:
-                st.image(result.metadata["url"], caption=f"Score: {result.score:.4f}")
-elif search_mode == "Image Search":
-    uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
-    if uploaded_file:
-        image = Image.open(uploaded_file).convert("RGB")
-        st.image(image, caption="Uploaded Image", use_column_width=True)
-        with st.spinner("Searching similar images..."):
-            inputs = processor(images=image, return_tensors="pt")
-            image_features = model.get_image_features(**inputs)
-            image_embedding = image_features.detach().numpy().flatten().tolist()
-            response = unsplash_index.query(
-                top_k=10,
-                vector=image_embedding,
-                namespace="image-search-dataset",
-                include_metadata=True
-            )
-        # Display results
         cols = st.columns(2)
         for i, result in enumerate(response.matches):
             with cols[i % 2]:
-                st.image(result.metadata["url"], caption=f"Score: {result.score:.4f}")

+import json
 import os
 import time
 import streamlit as st
 from dotenv import load_dotenv
 from pinecone import Pinecone, ServerlessSpec
+from transformers import AutoProcessor, CLIPModel
 from PIL import Image
+import torch
+global processor, model
 processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+load_dotenv()
 # Connect to Pinecone
+pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
+# Create an index if it does not exist
 index_name = "index-search"
+unsplash_index = None
 if not pc.has_index(index_name):
     pc.create_index(name=index_name, metric="cosine",
                     dimension=512,
                     spec=ServerlessSpec(cloud="aws", region="us-east-1"))
+    # Wait for the index to be ready
+    while True:
+        index = pc.describe_index(index_name)
+        if index.status.get("ready", False):
+            unsplash_index = pc.Index(index_name)
+            break
+        print("Waiting for index to be ready...")
+        time.sleep(1)
+else:
+    unsplash_index = pc.Index(index_name)
 # Streamlit UI
+st.title("🔍 CLIP-Powered Image Search")
+st.markdown("Search images using **text** or **image**!")
+# Search type selection
+search_type = st.radio("Select Search Type", ["Text Search", "Image Search"], horizontal=True)
+def get_text_embedding(query):
+    inputs = processor(text=query, return_tensors="pt")
+    text_features = model.get_text_features(**inputs)
+    return text_features.detach().numpy().flatten().tolist()
+def get_image_embedding(image):
+    image = image.convert("RGB").resize((224, 224))
+    inputs = processor(images=image, return_tensors="pt")
+    image_features = model.get_image_features(**inputs)
+    return image_features.detach().numpy().flatten().tolist()
+if search_type == "Text Search":
+    search_query = st.text_input("Enter a search query (min 3 characters)")
+    if len(search_query) >= 3:
+        with st.spinner("Searching images..."):
+            text_embedding = get_text_embedding(search_query)
+            response = unsplash_index.query(top_k=10, vector=text_embedding, namespace="image-search-dataset", include_metadata=True)
+        # Display images in two columns
         cols = st.columns(2)
         for i, result in enumerate(response.matches):
             with cols[i % 2]:
+                st.image(result.metadata["url"], caption=f"Match {i+1}")
+elif search_type == "Image Search":
+    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded Image")
+        with st.spinner("Searching for similar images..."):
+            image_embedding = get_image_embedding(image)
+            response = unsplash_index.query(top_k=10, vector=image_embedding, namespace="image-search-dataset", include_metadata=True)
+        # Display images in two columns
         cols = st.columns(2)
         for i, result in enumerate(response.matches):
             with cols[i % 2]:
+                st.image(result.metadata["url"], caption=f"Match {i+1}")

src/data/__pycache__/dataset.cpython-313.pyc CHANGED Viewed

Binary files a/src/data/__pycache__/dataset.cpython-313.pyc and b/src/data/__pycache__/dataset.cpython-313.pyc differ

src/data/__pycache__/request_method.cpython-313.pyc CHANGED Viewed

Binary files a/src/data/__pycache__/request_method.cpython-313.pyc and b/src/data/__pycache__/request_method.cpython-313.pyc differ

src/database/__init__.py ADDED Viewed

File without changes

src/database/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (159 Bytes). View file

src/database/__pycache__/create_pinecone_index.cpython-313.pyc ADDED Viewed

Binary file (3.11 kB). View file

src/database/create_pinecone_index.py CHANGED Viewed

@@ -8,10 +8,10 @@ import time
 from transformers import AutoProcessor, CLIPModel
 from data import dataset,request_method
-os.environ.pop("HF_TOKEN", None)
-os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
-load_dotenv()
 def get_index():
     pincone_api_key = os.environ.get("PINECONE_API_KEY")

 from transformers import AutoProcessor, CLIPModel
 from data import dataset,request_method
+# os.environ.pop("HF_TOKEN", None)
+# os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
+# load_dotenv()
 def get_index():
     pincone_api_key = os.environ.get("PINECONE_API_KEY")

src/model/clip_model.py CHANGED Viewed

@@ -1,27 +1,21 @@
-# Add src directory to path
-src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
-sys.path.append(src_directory)
 import os
 import sys
 import logging
 from transformers import AutoProcessor, CLIPModel
 from database import create_pinecone_index
 from data import request_method
 from dotenv import load_dotenv
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-# Set Hugging Face token
 load_dotenv()
-HF_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
 # Load CLIP model and processor
 model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
@@ -47,29 +41,35 @@ def get_image_embedding(image_data):
         if not photo_id or not url:
             raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
         image = request_method.get_urlimage(image_data)
         inputs = processor(images=image, return_tensors="pt")
-        image_features = model.get_image_features(**inputs)
-        embeddings = image_features.detach().cpu().numpy().flatten().tolist()
         pinecone_index = create_pinecone_index.get_index()
         pinecone_index.upsert(
             vectors=[
                 {
-                    "id": photo_id,
                     "values": embeddings,
                     "metadata": {
                         "url": url,
-                        "photo_id": photo_id
                     }
                 },
             ],
             namespace="image-search-dataset"
         )
-        logger.info(f"Successfully indexed image {photo_id}")
         return f"Successfully indexed image {photo_id}"
     except Exception as e:
-        logger.error(f"Error processing image {image_data}: {e}")
-        return f"Error processing image {photo_id}: {e}"

 import os
 import sys
+src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
+sys.path.append(src_directory)
 import logging
 from transformers import AutoProcessor, CLIPModel
 from database import create_pinecone_index
 from data import request_method
 from dotenv import load_dotenv
+import torch
+# Add src directory to path
+# Load environment variables
 load_dotenv()
+# HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
 # Load CLIP model and processor
 model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
         if not photo_id or not url:
             raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
+        # Retrieve the image from the URL
         image = request_method.get_urlimage(image_data)
+        if image is None:
+            raise ValueError(f"Failed to retrieve image from URL: {url}")
+        # Process image and generate embeddings
         inputs = processor(images=image, return_tensors="pt")
+        with torch.no_grad():
+            image_features = model.get_image_features(**inputs)
+        embeddings = image_features.cpu().numpy().flatten().tolist()
+        # Index the embeddings in Pinecone
         pinecone_index = create_pinecone_index.get_index()
         pinecone_index.upsert(
             vectors=[
                 {
+                    "id": str(photo_id),
                     "values": embeddings,
                     "metadata": {
                         "url": url,
+                        "photo_id": str(photo_id)
                     }
                 },
             ],
             namespace="image-search-dataset"
         )
         return f"Successfully indexed image {photo_id}"
     except Exception as e:
+        logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}")
+        return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"