Spaces:
Sleeping
Sleeping
Enhanced streamlit and uploading in huggingface
Browse files- .gitattributes +35 -0
- README.md +0 -2
- src/app/__pycache__/homepage.cpython-313.pyc +0 -0
- src/app/app.py +3 -13
- src/app/homepage.py +131 -15
- src/app/pages/__pycache__/load_data_page.cpython-313.pyc +0 -0
- src/app/pages/load_data_page.py +5 -0
- src/database_pinecone/__pycache__/create_database.cpython-313.pyc +0 -0
- src/database_pinecone/__pycache__/querry_database.cpython-313.pyc +0 -0
- src/database_pinecone/create_database.py +33 -28
- src/model/__pycache__/clip_model.cpython-313.pyc +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
# lookalike-image-finder
|
| 2 |
-
"Look-A-Like Image Finder lets you search for similar images by providing either a text description or an image.
|
|
|
|
|
|
|
|
|
src/app/__pycache__/homepage.cpython-313.pyc
CHANGED
|
Binary files a/src/app/__pycache__/homepage.cpython-313.pyc and b/src/app/__pycache__/homepage.cpython-313.pyc differ
|
|
|
src/app/app.py
CHANGED
|
@@ -1,16 +1,6 @@
|
|
| 1 |
import homepage
|
| 2 |
-
|
| 3 |
-
search_option = ['Select an option','Search by text', 'Search by image']
|
| 4 |
|
| 5 |
homepage.setup_page()
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
if choosen_option.lower() == 'search by text':
|
| 9 |
-
user_query = homepage.get_search_text_input()
|
| 10 |
-
if user_query:
|
| 11 |
-
homepage.get_images_by_text(user_query)
|
| 12 |
-
elif choosen_option.lower() == 'search by image':
|
| 13 |
-
image_input = homepage.get_search_image_input()
|
| 14 |
-
if image_input:
|
| 15 |
-
homepage.get_images_by_image(image_input)
|
| 16 |
-
|
|
|
|
| 1 |
import homepage
|
| 2 |
+
import streamlit as st
|
|
|
|
| 3 |
|
| 4 |
homepage.setup_page()
|
| 5 |
+
homepage.search_tab()
|
| 6 |
+
st.link_button("Navigate to load data page",url="http://localhost:8501/load_data_page")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app/homepage.py
CHANGED
|
@@ -4,32 +4,88 @@ src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..",
|
|
| 4 |
sys.path.append(src_directory)
|
| 5 |
import streamlit as st
|
| 6 |
from utils import logger
|
| 7 |
-
from database_pinecone import querry_database
|
| 8 |
from model.clip_model import ClipModel
|
|
|
|
| 9 |
|
| 10 |
clip_model = ClipModel()
|
| 11 |
logger = logger.get_logger()
|
| 12 |
|
| 13 |
-
PAGE_TITLE = "Look
|
| 14 |
-
PAGE_LAYOUT = "
|
| 15 |
SIDEBAR_TITLE = "Find Similar Images"
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
def setup_page():
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def get_user_selection(options):
|
| 28 |
selected_option = st.sidebar.selectbox("Select the option", options)
|
| 29 |
return selected_option
|
| 30 |
|
| 31 |
def get_search_image_input():
|
| 32 |
-
uploaded_image = st.
|
| 33 |
return uploaded_image
|
| 34 |
|
| 35 |
def get_search_text_input():
|
|
@@ -37,11 +93,13 @@ def get_search_text_input():
|
|
| 37 |
return user_search
|
| 38 |
|
| 39 |
def display_images(response):
|
|
|
|
| 40 |
if response:
|
| 41 |
cols = st.columns(2)
|
| 42 |
for i, result in enumerate(response.matches):
|
| 43 |
with cols[i % 2]:
|
| 44 |
-
st.image(result.metadata["url"])
|
|
|
|
| 45 |
|
| 46 |
def write_message(message):
|
| 47 |
st.write(message)
|
|
@@ -49,13 +107,71 @@ def write_message(message):
|
|
| 49 |
def get_images_by_text(query):
|
| 50 |
embedding = clip_model.get_text_embedding(query)
|
| 51 |
response = querry_database.fetch_data(embedding)
|
| 52 |
-
message = f"Showing search results for {query}"
|
| 53 |
write_message(message)
|
| 54 |
images = display_images(response)
|
| 55 |
|
| 56 |
def get_images_by_image(query):
|
| 57 |
embedding = clip_model.get_uploaded_image_embedding(query)
|
| 58 |
response = querry_database.fetch_data(embedding)
|
| 59 |
-
message = f"Showing search results of relevant images"
|
| 60 |
write_message(message)
|
| 61 |
images = display_images(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
sys.path.append(src_directory)
|
| 5 |
import streamlit as st
|
| 6 |
from utils import logger
|
| 7 |
+
from database_pinecone import querry_database,create_database
|
| 8 |
from model.clip_model import ClipModel
|
| 9 |
+
from data import data_set
|
| 10 |
|
| 11 |
clip_model = ClipModel()
|
| 12 |
logger = logger.get_logger()
|
| 13 |
|
| 14 |
+
PAGE_TITLE = "Look-a-Like: Image Finder"
|
| 15 |
+
PAGE_LAYOUT = "wide"
|
| 16 |
SIDEBAR_TITLE = "Find Similar Images"
|
| 17 |
+
PHOTO_ID_KEY = "photo_id"
|
| 18 |
+
IMAGE_URL_KEY = "photo_image_url"
|
| 19 |
+
PINECONE_INDEX = create_database.get_index()
|
| 20 |
|
| 21 |
def setup_page():
|
| 22 |
+
st.set_page_config(page_title=PAGE_TITLE, layout=PAGE_LAYOUT)
|
| 23 |
+
st.markdown(f"""
|
| 24 |
+
<h1 style='color:darkblue; text-align:center; font-size:32px; margin-top:-10px;'>
|
| 25 |
+
<i>{PAGE_TITLE} 🔍📸</i>
|
| 26 |
+
</h1>
|
| 27 |
+
""", unsafe_allow_html=True)
|
| 28 |
+
st.toast("✨ Welcome to Look-a-Like: The Ultimate Image Finder! Start searching now. 🔍")
|
| 29 |
+
logger.info(f"Page successfully configured with title: {PAGE_TITLE}")
|
| 30 |
|
| 31 |
+
def search_tab():
|
| 32 |
+
|
| 33 |
+
st.markdown("<hr>", unsafe_allow_html=True) # To add a Horizontal line below title
|
| 34 |
+
|
| 35 |
+
if "search_query" not in st.session_state:
|
| 36 |
+
st.session_state.search_query = ""
|
| 37 |
+
if "uploaded_image" not in st.session_state:
|
| 38 |
+
st.session_state.uploaded_image = None
|
| 39 |
+
|
| 40 |
+
with st.container():
|
| 41 |
+
|
| 42 |
+
col1, col2 = st.columns([7, 4], gap="small")
|
| 43 |
+
with col1:
|
| 44 |
+
search_query = st.text_input(
|
| 45 |
+
label="🔍 Search for Images",
|
| 46 |
+
placeholder="Type keywords (e.g., 'sunset beach', 'city skyline')",
|
| 47 |
+
value=st.session_state.search_query
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
if search_query.strip() and search_query != st.session_state.search_query:
|
| 51 |
+
st.session_state.search_query = search_query.strip()
|
| 52 |
+
st.session_state.uploaded_image = None
|
| 53 |
+
|
| 54 |
+
with col2:
|
| 55 |
+
uploaded_image = st.file_uploader(
|
| 56 |
+
label="📤 Upload an Image",
|
| 57 |
+
type=["png", "jpg", "jpeg"],
|
| 58 |
+
help="Upload an image to find visually similar results."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
if uploaded_image is not None and uploaded_image != st.session_state.uploaded_image:
|
| 62 |
+
st.session_state.uploaded_image = uploaded_image
|
| 63 |
+
st.session_state.search_query = ""
|
| 64 |
+
|
| 65 |
+
# with col3:
|
| 66 |
+
# st.markdown("<br>", unsafe_allow_html=True)
|
| 67 |
+
# if st.button(label="🗑️ Clear", help="Clear search input and uploaded image"):
|
| 68 |
+
# st.session_state.search_query = ""
|
| 69 |
+
# st.session_state.uploaded_image = None
|
| 70 |
+
# st.session_state.clear()
|
| 71 |
+
|
| 72 |
+
with st.container():
|
| 73 |
+
if st.session_state.search_query:
|
| 74 |
+
get_images_by_text(st.session_state.search_query)
|
| 75 |
+
st.session_state.search_query = ""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if st.session_state.uploaded_image:
|
| 79 |
+
st.image(st.session_state.uploaded_image, caption="Uploaded Image", use_container_width=True)
|
| 80 |
+
get_images_by_image(st.session_state.uploaded_image)
|
| 81 |
+
st.session_state.uploaded_image = None
|
| 82 |
+
|
| 83 |
def get_user_selection(options):
|
| 84 |
selected_option = st.sidebar.selectbox("Select the option", options)
|
| 85 |
return selected_option
|
| 86 |
|
| 87 |
def get_search_image_input():
|
| 88 |
+
uploaded_image = st.file_uploader("Upload the image to get similar images", type=['png', 'jpeg'])
|
| 89 |
return uploaded_image
|
| 90 |
|
| 91 |
def get_search_text_input():
|
|
|
|
| 93 |
return user_search
|
| 94 |
|
| 95 |
def display_images(response):
|
| 96 |
+
logger.info("Loading the images to dispay")
|
| 97 |
if response:
|
| 98 |
cols = st.columns(2)
|
| 99 |
for i, result in enumerate(response.matches):
|
| 100 |
with cols[i % 2]:
|
| 101 |
+
st.image(result.metadata["url"], width=500)
|
| 102 |
+
logger.info("Displayed the images successfully")
|
| 103 |
|
| 104 |
def write_message(message):
|
| 105 |
st.write(message)
|
|
|
|
| 107 |
def get_images_by_text(query):
|
| 108 |
embedding = clip_model.get_text_embedding(query)
|
| 109 |
response = querry_database.fetch_data(embedding)
|
| 110 |
+
message = f"🔍 Showing search results for {query}"
|
| 111 |
write_message(message)
|
| 112 |
images = display_images(response)
|
| 113 |
|
| 114 |
def get_images_by_image(query):
|
| 115 |
embedding = clip_model.get_uploaded_image_embedding(query)
|
| 116 |
response = querry_database.fetch_data(embedding)
|
| 117 |
+
message = f"🔍 Showing search results of relevant images"
|
| 118 |
write_message(message)
|
| 119 |
images = display_images(response)
|
| 120 |
+
|
| 121 |
+
def load_data():
|
| 122 |
+
st.sidebar.header("📊 Data Loading Parameters")
|
| 123 |
+
start_index = st.sidebar.number_input("Select start index", min_value=0, value=0)
|
| 124 |
+
end_index = st.sidebar.number_input("Select end index", min_value=0, value=100)
|
| 125 |
+
|
| 126 |
+
if start_index > end_index:
|
| 127 |
+
st.sidebar.error("⚠️ Start index must be earlier than the end index.")
|
| 128 |
+
return
|
| 129 |
+
|
| 130 |
+
if "load_clicked" not in st.session_state:
|
| 131 |
+
st.session_state.load_clicked = False
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
st.sidebar.info(f"Click the button to load data from index **{start_index} to {end_index}**.")
|
| 135 |
+
if st.sidebar.button("🚀 Upsert Data", disabled=st.session_state.load_clicked, help="Click to insert data into the database"):
|
| 136 |
+
st.session_state.load_clicked = True
|
| 137 |
+
|
| 138 |
+
with st.spinner("⏳ Upserting data... Please wait"):
|
| 139 |
+
df = data_set.get_df(start_index, end_index)
|
| 140 |
+
if df.empty:
|
| 141 |
+
st.warning("⚠️ No data found in the selected range.")
|
| 142 |
+
return
|
| 143 |
+
success_message = st.empty()
|
| 144 |
+
progress_bar = st.progress(0)
|
| 145 |
+
start = 0
|
| 146 |
+
end = len(df)
|
| 147 |
+
for i, data in df.iterrows():
|
| 148 |
+
create_database.process_and_upsert_data(PINECONE_INDEX, data, IMAGE_URL_KEY, PHOTO_ID_KEY)
|
| 149 |
+
success_message.success(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) added successfully!")
|
| 150 |
+
logger.info(f"Row {i + 1} (ID: {data.get('photo_id', 'unknown')}) upserted successfully.")
|
| 151 |
+
start = start + 1
|
| 152 |
+
progress = int((start) / end * 100)
|
| 153 |
+
progress_bar.progress(progress)
|
| 154 |
+
progress_bar.empty()
|
| 155 |
+
success_message.success("All data loaded and added to the database successfully!")
|
| 156 |
+
st.session_state.load_clicked = False
|
| 157 |
+
st.rerun()
|
| 158 |
+
|
| 159 |
+
except Exception as e:
|
| 160 |
+
st.error(f"Error loading data: {e}")
|
| 161 |
+
logger.error(f"Error loading data: {e}")
|
| 162 |
+
st.session_state.load_clicked = False
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def about_us():
|
| 166 |
+
if st.button("About us"):
|
| 167 |
+
st.write("""
|
| 168 |
+
This app allows you to search for images in two powerful ways:
|
| 169 |
+
|
| 170 |
+
1. **Text-based Query**: You can simply type a description or keyword, and we will fetch the most relevant images from our database.
|
| 171 |
+
|
| 172 |
+
2. **Image-based Query**: Alternatively, you can upload an image, and we'll search for similar images based on your input image.
|
| 173 |
+
|
| 174 |
+
Whether you're looking for images based on a specific text query or searching using an image, our app makes it easy to find exactly what you're looking for. Simply enter your query and get results instantly!
|
| 175 |
+
|
| 176 |
+
Explore and discover the images you need. Enjoy the search experience! 😊
|
| 177 |
+
""")
|
src/app/pages/__pycache__/load_data_page.cpython-313.pyc
ADDED
|
Binary file (3.42 kB). View file
|
|
|
src/app/pages/load_data_page.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import homepage
|
| 2 |
+
|
| 3 |
+
homepage.setup_page()
|
| 4 |
+
homepage.load_data()
|
| 5 |
+
|
src/database_pinecone/__pycache__/create_database.cpython-313.pyc
CHANGED
|
Binary files a/src/database_pinecone/__pycache__/create_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/create_database.cpython-313.pyc differ
|
|
|
src/database_pinecone/__pycache__/querry_database.cpython-313.pyc
CHANGED
|
Binary files a/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc and b/src/database_pinecone/__pycache__/querry_database.cpython-313.pyc differ
|
|
|
src/database_pinecone/create_database.py
CHANGED
|
@@ -5,10 +5,9 @@ sys.path.append(src_directory)
|
|
| 5 |
from pinecone import Pinecone, ServerlessSpec
|
| 6 |
import time
|
| 7 |
from model.clip_model import ClipModel
|
| 8 |
-
from data import request_images
|
| 9 |
-
from data import data_set
|
| 10 |
from config import config
|
| 11 |
from utils import logger
|
|
|
|
| 12 |
|
| 13 |
config = config.load_config()
|
| 14 |
logger = logger.get_logger()
|
|
@@ -57,38 +56,44 @@ def get_index():
|
|
| 57 |
logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
|
| 58 |
return index
|
| 59 |
|
| 60 |
-
def
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
index.upsert(
|
| 64 |
vectors=[{
|
| 65 |
-
"id":
|
| 66 |
"values": embeddings,
|
| 67 |
"metadata": {
|
| 68 |
-
|
| 69 |
-
|
| 70 |
}
|
| 71 |
}],
|
| 72 |
namespace="image-search-dataset",
|
| 73 |
)
|
| 74 |
-
logger.info(f"Successfully upserted
|
|
|
|
|
|
|
| 75 |
except Exception as e:
|
| 76 |
-
logger.
|
| 77 |
-
raise
|
| 78 |
-
|
| 79 |
-
def add_data_to_database(df):
|
| 80 |
-
try:
|
| 81 |
-
index = get_index()
|
| 82 |
-
logger.info("Starting to add the embeddings to the database")
|
| 83 |
-
for _, data in df.iterrows():
|
| 84 |
-
url = data['photo_image_url']
|
| 85 |
-
id = data['photo_id']
|
| 86 |
-
embeddings = clip_model.get_image_embedding(url)
|
| 87 |
-
upsert_data(index,embeddings,id,url)
|
| 88 |
-
logger.info("Added embeddings to the database successfully")
|
| 89 |
-
except Exception as e:
|
| 90 |
-
logger.info("Unable to add the data. Error : {e}")
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
# df = data_set.get_df(8000,8500)
|
| 94 |
-
# add_data_to_database(df)
|
|
|
|
| 5 |
from pinecone import Pinecone, ServerlessSpec
|
| 6 |
import time
|
| 7 |
from model.clip_model import ClipModel
|
|
|
|
|
|
|
| 8 |
from config import config
|
| 9 |
from utils import logger
|
| 10 |
+
import pandas as pd
|
| 11 |
|
| 12 |
config = config.load_config()
|
| 13 |
logger = logger.get_logger()
|
|
|
|
| 56 |
logger.info(f"Error occurred while getting or creating the Pinecone index: {str(e)}", exc_info=True)
|
| 57 |
return index
|
| 58 |
|
| 59 |
+
def process_and_upsert_data(index, data: pd.Series, url_key: str, id_key: str):
|
| 60 |
+
"""
|
| 61 |
+
Processes a single row of data (pandas Series) by extracting the URL and ID, generating image embeddings using
|
| 62 |
+
a clip model, and then upserting the generated embeddings into a pinecone database index.
|
| 63 |
+
|
| 64 |
+
This function handles:
|
| 65 |
+
- Extracting the URL and ID from the provided `data` (a pandas Series) using the specified keys (`url_key` and `id_key`).
|
| 66 |
+
- Using the `clip_model` to generate embeddings for the image found at the extracted URL.
|
| 67 |
+
- Upserting the generated embeddings, along with the photo ID and URL, into the pinecone database index using the `upsert` method.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
data (pandas.Series): A single row of data from the DataFrame, containing the URL and ID.
|
| 71 |
+
url_key (str): The column name in the Series that contains the URL of the image.
|
| 72 |
+
id_key (str): The column name in the Series that contains the photo ID.
|
| 73 |
+
|
| 74 |
+
"""
|
| 75 |
+
# Validate if the required columns exist in the row (Series)
|
| 76 |
+
if url_key not in data or id_key not in data:
|
| 77 |
+
raise ValueError(f"Missing required keys: '{url_key}' or '{id_key}' in the data")
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
logger.info("Started to process and upsert the data")
|
| 81 |
+
url = data[url_key]
|
| 82 |
+
photo_id = data[id_key]
|
| 83 |
+
embeddings = clip_model.get_image_embedding(url)
|
| 84 |
index.upsert(
|
| 85 |
vectors=[{
|
| 86 |
+
"id": photo_id,
|
| 87 |
"values": embeddings,
|
| 88 |
"metadata": {
|
| 89 |
+
"url": url,
|
| 90 |
+
"photo_id": photo_id
|
| 91 |
}
|
| 92 |
}],
|
| 93 |
namespace="image-search-dataset",
|
| 94 |
)
|
| 95 |
+
logger.info(f"Successfully upserted data for photo_id {photo_id} with URL {url}")
|
| 96 |
+
except ValueError as ve:
|
| 97 |
+
logger.error(f"ValueError: {ve}")
|
| 98 |
except Exception as e:
|
| 99 |
+
logger.error(f"Error processing row with photo_id {data.get(id_key, 'unknown')}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/model/__pycache__/clip_model.cpython-313.pyc
CHANGED
|
Binary files a/src/model/__pycache__/clip_model.cpython-313.pyc and b/src/model/__pycache__/clip_model.cpython-313.pyc differ
|
|
|