velmurugan1122 commited on
Commit
39bee95
·
1 Parent(s): 862e786

fix the changes

Browse files
src/app.py CHANGED
@@ -1,91 +1,84 @@
 
1
  import os
2
  import time
3
- import logging
4
  import streamlit as st
5
- import requests
6
- import torch
7
  from dotenv import load_dotenv
8
  from pinecone import Pinecone, ServerlessSpec
9
- from transformers import AutoTokenizer, CLIPModel, AutoProcessor
10
  from PIL import Image
 
11
 
12
- # Logging setup
13
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
14
- logger = logging.getLogger(__name__)
15
-
16
- # Load environment variables
17
- load_dotenv()
18
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
19
- # HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
20
-
21
- # # Ensure Hugging Face authentication
22
- # from huggingface_hub import login
23
- # login(HF_ACCESS_TOKEN)
24
 
25
- # Load CLIP model and processor
26
- tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
27
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
28
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
 
 
 
29
 
30
  # Connect to Pinecone
31
- pc = Pinecone(api_key=PINECONE_API_KEY)
32
 
33
- # Ensure the index exists
34
  index_name = "index-search"
 
 
35
  if not pc.has_index(index_name):
36
  pc.create_index(name=index_name, metric="cosine",
37
  dimension=512,
38
  spec=ServerlessSpec(cloud="aws", region="us-east-1"))
39
- time.sleep(5) # Wait for index to initialize
40
-
41
- unsplash_index = pc.Index(index_name)
 
 
 
 
 
 
 
42
 
43
  # Streamlit UI
44
- st.title("Search Images by Text or Image")
 
45
 
46
- search_mode = st.radio("Choose search mode:", ["Text Search", "Image Search"])
 
47
 
48
- if search_mode == "Text Search":
49
- search_query = st.text_input("Search (at least 3 characters)")
50
- if len(search_query) >= 3:
51
- with st.spinner("Searching images..."):
52
- inputs = tokenizer([search_query], padding=True, return_tensors="pt")
53
- text_features = model.get_text_features(**inputs)
54
- text_embedding = text_features.detach().numpy().flatten().tolist()
55
 
56
- response = unsplash_index.query(
57
- top_k=10,
58
- vector=text_embedding,
59
- namespace="image-search-dataset",
60
- include_metadata=True
61
- )
62
 
63
- # Display results
 
 
 
 
 
 
 
64
  cols = st.columns(2)
65
  for i, result in enumerate(response.matches):
66
  with cols[i % 2]:
67
- st.image(result.metadata["url"], caption=f"Score: {result.score:.4f}")
68
-
69
- elif search_mode == "Image Search":
70
- uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
71
- if uploaded_file:
72
- image = Image.open(uploaded_file).convert("RGB")
73
- st.image(image, caption="Uploaded Image", use_column_width=True)
74
-
75
- with st.spinner("Searching similar images..."):
76
- inputs = processor(images=image, return_tensors="pt")
77
- image_features = model.get_image_features(**inputs)
78
- image_embedding = image_features.detach().numpy().flatten().tolist()
79
-
80
- response = unsplash_index.query(
81
- top_k=10,
82
- vector=image_embedding,
83
- namespace="image-search-dataset",
84
- include_metadata=True
85
- )
86
-
87
- # Display results
88
  cols = st.columns(2)
89
  for i, result in enumerate(response.matches):
90
  with cols[i % 2]:
91
- st.image(result.metadata["url"], caption=f"Score: {result.score:.4f}")
 
1
+ import json
2
  import os
3
  import time
 
4
  import streamlit as st
 
 
5
  from dotenv import load_dotenv
6
  from pinecone import Pinecone, ServerlessSpec
7
+ from transformers import AutoProcessor, CLIPModel
8
  from PIL import Image
9
+ import torch
10
 
11
+ global processor, model
 
 
 
 
 
 
 
 
 
 
 
12
 
 
 
 
13
  processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
15
+
16
+ load_dotenv()
17
 
18
  # Connect to Pinecone
19
+ pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
20
 
21
+ # Create an index if it does not exist
22
  index_name = "index-search"
23
+ unsplash_index = None
24
+
25
  if not pc.has_index(index_name):
26
  pc.create_index(name=index_name, metric="cosine",
27
  dimension=512,
28
  spec=ServerlessSpec(cloud="aws", region="us-east-1"))
29
+ # Wait for the index to be ready
30
+ while True:
31
+ index = pc.describe_index(index_name)
32
+ if index.status.get("ready", False):
33
+ unsplash_index = pc.Index(index_name)
34
+ break
35
+ print("Waiting for index to be ready...")
36
+ time.sleep(1)
37
+ else:
38
+ unsplash_index = pc.Index(index_name)
39
 
40
  # Streamlit UI
41
+ st.title("🔍 CLIP-Powered Image Search")
42
+ st.markdown("Search images using **text** or **image**!")
43
 
44
+ # Search type selection
45
+ search_type = st.radio("Select Search Type", ["Text Search", "Image Search"], horizontal=True)
46
 
47
+ def get_text_embedding(query):
48
+ inputs = processor(text=query, return_tensors="pt")
49
+ text_features = model.get_text_features(**inputs)
50
+ return text_features.detach().numpy().flatten().tolist()
 
 
 
51
 
52
+ def get_image_embedding(image):
53
+ image = image.convert("RGB").resize((224, 224))
54
+ inputs = processor(images=image, return_tensors="pt")
55
+ image_features = model.get_image_features(**inputs)
56
+ return image_features.detach().numpy().flatten().tolist()
 
57
 
58
+ if search_type == "Text Search":
59
+ search_query = st.text_input("Enter a search query (min 3 characters)")
60
+ if len(search_query) >= 3:
61
+ with st.spinner("Searching images..."):
62
+ text_embedding = get_text_embedding(search_query)
63
+ response = unsplash_index.query(top_k=10, vector=text_embedding, namespace="image-search-dataset", include_metadata=True)
64
+
65
+ # Display images in two columns
66
  cols = st.columns(2)
67
  for i, result in enumerate(response.matches):
68
  with cols[i % 2]:
69
+ st.image(result.metadata["url"], caption=f"Match {i+1}")
70
+
71
+ elif search_type == "Image Search":
72
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
73
+ if uploaded_file is not None:
74
+ image = Image.open(uploaded_file)
75
+ st.image(image, caption="Uploaded Image")
76
+ with st.spinner("Searching for similar images..."):
77
+ image_embedding = get_image_embedding(image)
78
+ response = unsplash_index.query(top_k=10, vector=image_embedding, namespace="image-search-dataset", include_metadata=True)
79
+
80
+ # Display images in two columns
 
 
 
 
 
 
 
 
 
81
  cols = st.columns(2)
82
  for i, result in enumerate(response.matches):
83
  with cols[i % 2]:
84
+ st.image(result.metadata["url"], caption=f"Match {i+1}")
src/data/__pycache__/dataset.cpython-313.pyc CHANGED
Binary files a/src/data/__pycache__/dataset.cpython-313.pyc and b/src/data/__pycache__/dataset.cpython-313.pyc differ
 
src/data/__pycache__/request_method.cpython-313.pyc CHANGED
Binary files a/src/data/__pycache__/request_method.cpython-313.pyc and b/src/data/__pycache__/request_method.cpython-313.pyc differ
 
src/database/__init__.py ADDED
File without changes
src/database/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (159 Bytes). View file
 
src/database/__pycache__/create_pinecone_index.cpython-313.pyc ADDED
Binary file (3.11 kB). View file
 
src/database/create_pinecone_index.py CHANGED
@@ -8,10 +8,10 @@ import time
8
  from transformers import AutoProcessor, CLIPModel
9
  from data import dataset,request_method
10
 
11
- os.environ.pop("HF_TOKEN", None)
12
- os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
13
 
14
- load_dotenv()
15
 
16
  def get_index():
17
  pincone_api_key = os.environ.get("PINECONE_API_KEY")
 
8
  from transformers import AutoProcessor, CLIPModel
9
  from data import dataset,request_method
10
 
11
+ # os.environ.pop("HF_TOKEN", None)
12
+ # os.environ.pop("HUGGING_FACE_HUB_TOKEN", None)
13
 
14
+ # load_dotenv()
15
 
16
  def get_index():
17
  pincone_api_key = os.environ.get("PINECONE_API_KEY")
src/model/clip_model.py CHANGED
@@ -1,27 +1,21 @@
1
- # Add src directory to path
2
- src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
3
- sys.path.append(src_directory)
4
  import os
5
  import sys
 
 
 
 
6
  import logging
7
  from transformers import AutoProcessor, CLIPModel
8
  from database import create_pinecone_index
9
  from data import request_method
10
  from dotenv import load_dotenv
 
11
 
 
12
 
13
- # Configure logging
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format="%(asctime)s - %(levelname)s - %(message)s",
17
- )
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
-
22
- # Set Hugging Face token
23
  load_dotenv()
24
- HF_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
25
 
26
  # Load CLIP model and processor
27
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
@@ -47,29 +41,35 @@ def get_image_embedding(image_data):
47
  if not photo_id or not url:
48
  raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
49
 
 
50
  image = request_method.get_urlimage(image_data)
 
 
 
 
51
  inputs = processor(images=image, return_tensors="pt")
52
- image_features = model.get_image_features(**inputs)
53
- embeddings = image_features.detach().cpu().numpy().flatten().tolist()
 
54
 
 
55
  pinecone_index = create_pinecone_index.get_index()
56
  pinecone_index.upsert(
57
  vectors=[
58
  {
59
- "id": photo_id,
60
  "values": embeddings,
61
  "metadata": {
62
  "url": url,
63
- "photo_id": photo_id
64
  }
65
  },
66
  ],
67
  namespace="image-search-dataset"
68
  )
69
 
70
- logger.info(f"Successfully indexed image {photo_id}")
71
  return f"Successfully indexed image {photo_id}"
72
 
73
  except Exception as e:
74
- logger.error(f"Error processing image {image_data}: {e}")
75
- return f"Error processing image {photo_id}: {e}"
 
 
 
 
1
  import os
2
  import sys
3
+
4
+ src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
5
+ sys.path.append(src_directory)
6
+
7
  import logging
8
  from transformers import AutoProcessor, CLIPModel
9
  from database import create_pinecone_index
10
  from data import request_method
11
  from dotenv import load_dotenv
12
+ import torch
13
 
14
+ # Add src directory to path
15
 
16
+ # Load environment variables
 
 
 
 
 
 
 
 
 
17
  load_dotenv()
18
+ # HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
19
 
20
  # Load CLIP model and processor
21
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 
41
  if not photo_id or not url:
42
  raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
43
 
44
+ # Retrieve the image from the URL
45
  image = request_method.get_urlimage(image_data)
46
+ if image is None:
47
+ raise ValueError(f"Failed to retrieve image from URL: {url}")
48
+
49
+ # Process image and generate embeddings
50
  inputs = processor(images=image, return_tensors="pt")
51
+ with torch.no_grad():
52
+ image_features = model.get_image_features(**inputs)
53
+ embeddings = image_features.cpu().numpy().flatten().tolist()
54
 
55
+ # Index the embeddings in Pinecone
56
  pinecone_index = create_pinecone_index.get_index()
57
  pinecone_index.upsert(
58
  vectors=[
59
  {
60
+ "id": str(photo_id),
61
  "values": embeddings,
62
  "metadata": {
63
  "url": url,
64
+ "photo_id": str(photo_id)
65
  }
66
  },
67
  ],
68
  namespace="image-search-dataset"
69
  )
70
 
 
71
  return f"Successfully indexed image {photo_id}"
72
 
73
  except Exception as e:
74
+ logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}")
75
+ return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"