Spaces:
Sleeping
Sleeping
traopia commited on
Commit ·
e9b2c9e
1
Parent(s): c57012a
queries and other fizes
Browse files- app_fashionDB.py +116 -9
- example_queries.py +28 -0
- search_fashionDB.py +44 -12
app_fashionDB.py
CHANGED
|
@@ -4,16 +4,18 @@ import numpy as np
|
|
| 4 |
from search_fashionDB import search_images_by_text, get_similar_images, search_images_by_image
|
| 5 |
import requests
|
| 6 |
from io import BytesIO
|
|
|
|
| 7 |
|
| 8 |
import requests
|
| 9 |
from io import BytesIO
|
| 10 |
-
|
| 11 |
|
| 12 |
#@st.cache_data(show_spinner="Loading FashionDB...")
|
| 13 |
def load_data_hf():
|
| 14 |
# Load the Parquet file directly from Hugging Face
|
| 15 |
df_url = "https://huggingface.co/datasets/traopia/FashionDB/resolve/main/data_vogue_final.parquet"
|
| 16 |
df = pd.read_parquet(df_url)
|
|
|
|
| 17 |
df = df.explode("image_urls_sample")
|
| 18 |
df = df.rename(columns={"image_urls_sample":"url", "URL":"collection"})
|
| 19 |
|
|
@@ -39,8 +41,10 @@ df, df_fh, df_designers, embeddings, embeddings_urls = load_data_hf()
|
|
| 39 |
# Suppose embeddings is a numpy array (N, D) and embeddings_urls is a list of urls/keys
|
| 40 |
embedding_map = {url: i for i, url in enumerate(embeddings_urls)}
|
| 41 |
|
|
|
|
| 42 |
# Filter and search
|
| 43 |
-
def filter_and_search(fashion_house, designer, category, season, start_year, end_year, query
|
|
|
|
| 44 |
filtered = df.copy()
|
| 45 |
|
| 46 |
if fashion_house:
|
|
@@ -54,6 +58,30 @@ def filter_and_search(fashion_house, designer, category, season, start_year, end
|
|
| 54 |
filtered = filtered[filtered['season'].isin(season)]
|
| 55 |
filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
if query:
|
| 58 |
image_urls, metadata = search_images_by_text(query, filtered, embeddings, embeddings_urls)
|
| 59 |
else:
|
|
@@ -104,6 +132,20 @@ with gr.Blocks() as demo:
|
|
| 104 |
start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
| 105 |
end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
query = gr.Textbox(label="Search by text", placeholder="e.g., pink dress")
|
| 108 |
search_button = gr.Button("Search")
|
| 109 |
|
|
@@ -115,13 +157,13 @@ with gr.Blocks() as demo:
|
|
| 115 |
metadata_state = gr.State([])
|
| 116 |
selected_idx = gr.Number(value=0, visible=False)
|
| 117 |
|
| 118 |
-
def handle_search(fh, dis, cat, sea, sy, ey, q):
|
| 119 |
-
imgs, meta = filter_and_search(fh, dis, cat, sea, sy, ey, q)
|
| 120 |
return imgs, meta, "", [], None
|
| 121 |
|
| 122 |
search_button.click(
|
| 123 |
handle_search,
|
| 124 |
-
inputs=[fashion_house, designer, category, season, start_year, end_year, query],
|
| 125 |
outputs=[result_gallery, metadata_state, metadata_output, similar_gallery, reference_image]
|
| 126 |
)
|
| 127 |
|
|
@@ -174,6 +216,14 @@ with gr.Blocks() as demo:
|
|
| 174 |
start_year_img = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
| 175 |
end_year_img = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
uploaded_image = gr.Image(label="Upload an image", type="pil")
|
| 178 |
search_by_image_button = gr.Button("Search by Image")
|
| 179 |
|
|
@@ -182,23 +232,46 @@ with gr.Blocks() as demo:
|
|
| 182 |
uploaded_metadata_output = gr.Markdown()
|
| 183 |
uploaded_reference_image = gr.Image(label="Reference Image", interactive=False)
|
| 184 |
|
| 185 |
-
def handle_search_by_image(image, fh, dis, cat, sea, sy, ey):
|
| 186 |
if image is None:
|
| 187 |
return [], "Please upload an image first.", None
|
| 188 |
# Apply filters
|
| 189 |
filtered_df = df.copy()
|
| 190 |
if fh: filtered_df = filtered_df[filtered_df["fashion_house"].isin(fh)]
|
| 191 |
-
if dis: filtered_df = filtered_df[filtered_df["designer_name"].isin(
|
| 192 |
if cat: filtered_df = filtered_df[filtered_df["category"].isin(cat)]
|
| 193 |
if sea: filtered_df = filtered_df[filtered_df["season"].isin(sea)]
|
| 194 |
filtered_df = filtered_df[(filtered_df["year"] >= sy) & (filtered_df["year"] <= ey)]
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
images, metadata = search_images_by_image(image, filtered_df, embeddings, embeddings_urls)
|
| 197 |
return images, metadata, ""
|
| 198 |
|
| 199 |
search_by_image_button.click(
|
| 200 |
handle_search_by_image,
|
| 201 |
-
inputs=[uploaded_image, fashion_house_img, designer_img, category_img, season_img, start_year_img, end_year_img],
|
| 202 |
outputs=[uploaded_result_gallery, uploaded_metadata_state, uploaded_metadata_output]
|
| 203 |
)
|
| 204 |
|
|
@@ -257,12 +330,46 @@ with gr.Blocks() as demo:
|
|
| 257 |
)
|
| 258 |
|
| 259 |
with gr.Tab("Query on FashionDB"):
|
| 260 |
-
|
|
|
|
|
|
|
| 261 |
gr.Markdown(
|
| 262 |
"### 🔗 Query FashionDB SPARQL Endpoint\n"
|
| 263 |
"[Click here to open the SPARQL endpoint](https://fashionwiki.wikibase.cloud/query/)",
|
| 264 |
elem_id="sparql-link"
|
| 265 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
back_button = gr.Button("Back to Home")
|
| 268 |
|
|
|
|
| 4 |
from search_fashionDB import search_images_by_text, get_similar_images, search_images_by_image
|
| 5 |
import requests
|
| 6 |
from io import BytesIO
|
| 7 |
+
import urllib.parse
|
| 8 |
|
| 9 |
import requests
|
| 10 |
from io import BytesIO
|
| 11 |
+
from example_queries import EXAMPLE_QUERIES
|
| 12 |
|
| 13 |
#@st.cache_data(show_spinner="Loading FashionDB...")
|
| 14 |
def load_data_hf():
|
| 15 |
# Load the Parquet file directly from Hugging Face
|
| 16 |
df_url = "https://huggingface.co/datasets/traopia/FashionDB/resolve/main/data_vogue_final.parquet"
|
| 17 |
df = pd.read_parquet(df_url)
|
| 18 |
+
df = df.drop_duplicates(subset=["URL"])
|
| 19 |
df = df.explode("image_urls_sample")
|
| 20 |
df = df.rename(columns={"image_urls_sample":"url", "URL":"collection"})
|
| 21 |
|
|
|
|
| 41 |
# Suppose embeddings is a numpy array (N, D) and embeddings_urls is a list of urls/keys
|
| 42 |
embedding_map = {url: i for i, url in enumerate(embeddings_urls)}
|
| 43 |
|
| 44 |
+
|
| 45 |
# Filter and search
|
| 46 |
+
def filter_and_search(fashion_house, designer, category, season, start_year, end_year, query,
|
| 47 |
+
fh_country, fh_city, designer_nationality, designer_birth_year_start, designer_birth_year_end):
|
| 48 |
filtered = df.copy()
|
| 49 |
|
| 50 |
if fashion_house:
|
|
|
|
| 58 |
filtered = filtered[filtered['season'].isin(season)]
|
| 59 |
filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]
|
| 60 |
|
| 61 |
+
# Fashion house filters via df_fh (country, city)
|
| 62 |
+
if (fh_country and len(fh_country) > 0) or (fh_city and len(fh_city) > 0):
|
| 63 |
+
fh_cols = [c for c in ['fashion_house', 'country', 'city'] if c in df_fh.columns]
|
| 64 |
+
if 'fashion_house' in fh_cols:
|
| 65 |
+
merged = filtered.merge(df_fh[fh_cols], on='fashion_house', how='left')
|
| 66 |
+
if fh_country and 'country' in merged.columns:
|
| 67 |
+
merged = merged[merged['country'].isin(fh_country)]
|
| 68 |
+
if fh_city and 'city' in merged.columns:
|
| 69 |
+
merged = merged[merged['city'].isin(fh_city)]
|
| 70 |
+
filtered = merged.drop_duplicates(subset=['url'])
|
| 71 |
+
|
| 72 |
+
# Designer filters via df_designers (nationality, year_birth)
|
| 73 |
+
if (designer_nationality and len(designer_nationality) > 0) or (designer_birth_year_start is not None or designer_birth_year_end is not None):
|
| 74 |
+
des_cols = [c for c in ['designer_name', 'nationality', 'year_birth'] if c in df_designers.columns]
|
| 75 |
+
if 'designer_name' in des_cols:
|
| 76 |
+
merged = filtered.merge(df_designers[des_cols], on='designer_name', how='left')
|
| 77 |
+
if designer_nationality and 'nationality' in merged.columns:
|
| 78 |
+
merged = merged[merged['nationality'].isin(designer_nationality)]
|
| 79 |
+
if (designer_birth_year_start is not None or designer_birth_year_end is not None) and 'year_birth' in merged.columns:
|
| 80 |
+
by_start = designer_birth_year_start if designer_birth_year_start is not None else merged['year_birth'].min()
|
| 81 |
+
by_end = designer_birth_year_end if designer_birth_year_end is not None else merged['year_birth'].max()
|
| 82 |
+
merged = merged[(merged['year_birth'] >= by_start) & (merged['year_birth'] <= by_end)]
|
| 83 |
+
filtered = merged.drop_duplicates(subset=['url'])
|
| 84 |
+
|
| 85 |
if query:
|
| 86 |
image_urls, metadata = search_images_by_text(query, filtered, embeddings, embeddings_urls)
|
| 87 |
else:
|
|
|
|
| 132 |
start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
| 133 |
end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
| 134 |
|
| 135 |
+
# Additional filters banner for Fashion House and Designer metadata
|
| 136 |
+
with gr.Row():
|
| 137 |
+
fh_countries = sorted(df_fh['country'].dropna().unique()) if 'country' in df_fh.columns else []
|
| 138 |
+
fh_cities = sorted(df_fh['city'].dropna().unique()) if 'city' in df_fh.columns else []
|
| 139 |
+
designer_places = sorted(df_designers['nationality'].dropna().unique()) if 'nationality' in df_designers.columns else []
|
| 140 |
+
birth_year_min = int(df_designers['year_birth'].min()) if 'year_birth' in df_designers.columns else 1900
|
| 141 |
+
birth_year_max = int(df_designers['year_birth'].max()) if 'year_birth' in df_designers.columns else 2024
|
| 142 |
+
|
| 143 |
+
fh_country = gr.Dropdown(label="Country of Fashion House", choices=fh_countries, multiselect=True)
|
| 144 |
+
fh_city = gr.Dropdown(label="HQ of Fashion House", choices=fh_cities, multiselect=True)
|
| 145 |
+
designer_nationality = gr.Dropdown(label="Designer Nationality", choices=designer_places, multiselect=True)
|
| 146 |
+
designer_birth_year_start = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_min, step=1, label="Designer Birth Year Start")
|
| 147 |
+
designer_birth_year_end = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_max, step=1, label="Designer Birth Year End")
|
| 148 |
+
|
| 149 |
query = gr.Textbox(label="Search by text", placeholder="e.g., pink dress")
|
| 150 |
search_button = gr.Button("Search")
|
| 151 |
|
|
|
|
| 157 |
metadata_state = gr.State([])
|
| 158 |
selected_idx = gr.Number(value=0, visible=False)
|
| 159 |
|
| 160 |
+
def handle_search(fh, dis, cat, sea, sy, ey, q, fh_co, fh_ci, d_pob, d_by_start, d_by_end):
|
| 161 |
+
imgs, meta = filter_and_search(fh, dis, cat, sea, sy, ey, q, fh_co, fh_ci, d_pob, d_by_start, d_by_end)
|
| 162 |
return imgs, meta, "", [], None
|
| 163 |
|
| 164 |
search_button.click(
|
| 165 |
handle_search,
|
| 166 |
+
inputs=[fashion_house, designer, category, season, start_year, end_year, query, fh_country, fh_city, designer_nationality, designer_birth_year_start, designer_birth_year_end],
|
| 167 |
outputs=[result_gallery, metadata_state, metadata_output, similar_gallery, reference_image]
|
| 168 |
)
|
| 169 |
|
|
|
|
| 216 |
start_year_img = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
|
| 217 |
end_year_img = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
|
| 218 |
|
| 219 |
+
# Additional banner for FH/Designer filters in image search
|
| 220 |
+
with gr.Row():
|
| 221 |
+
fh_country_img = gr.Dropdown(label="Country of Fashion House", choices=fh_countries, multiselect=True)
|
| 222 |
+
fh_city_img = gr.Dropdown(label="HQ of Fashion House", choices=fh_cities, multiselect=True)
|
| 223 |
+
designer_nationality_img = gr.Dropdown(label="Designer Nationality", choices=designer_places, multiselect=True)
|
| 224 |
+
designer_birth_year_start_img = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_min, step=1, label="Designer Birth Year Start")
|
| 225 |
+
designer_birth_year_end_img = gr.Slider(minimum=birth_year_min, maximum=birth_year_max, value=birth_year_max, step=1, label="Designer Birth Year End")
|
| 226 |
+
|
| 227 |
uploaded_image = gr.Image(label="Upload an image", type="pil")
|
| 228 |
search_by_image_button = gr.Button("Search by Image")
|
| 229 |
|
|
|
|
| 232 |
uploaded_metadata_output = gr.Markdown()
|
| 233 |
uploaded_reference_image = gr.Image(label="Reference Image", interactive=False)
|
| 234 |
|
| 235 |
+
def handle_search_by_image(image, fh, dis, cat, sea, sy, ey, fh_co, fh_ci, d_pob, d_by_start, d_by_end):
|
| 236 |
if image is None:
|
| 237 |
return [], "Please upload an image first.", None
|
| 238 |
# Apply filters
|
| 239 |
filtered_df = df.copy()
|
| 240 |
if fh: filtered_df = filtered_df[filtered_df["fashion_house"].isin(fh)]
|
| 241 |
+
if dis: filtered_df = filtered_df[filtered_df["designer_name"].isin(dis)]
|
| 242 |
if cat: filtered_df = filtered_df[filtered_df["category"].isin(cat)]
|
| 243 |
if sea: filtered_df = filtered_df[filtered_df["season"].isin(sea)]
|
| 244 |
filtered_df = filtered_df[(filtered_df["year"] >= sy) & (filtered_df["year"] <= ey)]
|
| 245 |
|
| 246 |
+
# FH/Designer metadata filters via joins
|
| 247 |
+
if (fh_co and len(fh_co) > 0) or (fh_ci and len(fh_ci) > 0):
|
| 248 |
+
fh_cols = [c for c in ['fashion_house', 'country', 'city'] if c in df_fh.columns]
|
| 249 |
+
if 'fashion_house' in fh_cols:
|
| 250 |
+
merged = filtered_df.merge(df_fh[fh_cols], on='fashion_house', how='left')
|
| 251 |
+
if fh_co and 'country' in merged.columns:
|
| 252 |
+
merged = merged[merged['country'].isin(fh_co)]
|
| 253 |
+
if fh_ci and 'city' in merged.columns:
|
| 254 |
+
merged = merged[merged['city'].isin(fh_ci)]
|
| 255 |
+
filtered_df = merged.drop_duplicates(subset=['url'])
|
| 256 |
+
|
| 257 |
+
if (d_pob and len(d_pob) > 0) or (d_by_start is not None or d_by_end is not None):
|
| 258 |
+
des_cols = [c for c in ['designer_name', 'nationality', 'year_birth'] if c in df_designers.columns]
|
| 259 |
+
if 'designer_name' in des_cols:
|
| 260 |
+
merged = filtered_df.merge(df_designers[des_cols], on='designer_name', how='left')
|
| 261 |
+
if d_pob and 'nationality' in merged.columns:
|
| 262 |
+
merged = merged[merged['nationality'].isin(d_pob)]
|
| 263 |
+
if (d_by_start is not None or d_by_end is not None) and 'year_birth' in merged.columns:
|
| 264 |
+
by_start = d_by_start if d_by_start is not None else merged['year_birth'].min()
|
| 265 |
+
by_end = d_by_end if d_by_end is not None else merged['year_birth'].max()
|
| 266 |
+
merged = merged[(merged['year_birth'] >= by_start) & (merged['year_birth'] <= by_end)]
|
| 267 |
+
filtered_df = merged.drop_duplicates(subset=['url'])
|
| 268 |
+
|
| 269 |
images, metadata = search_images_by_image(image, filtered_df, embeddings, embeddings_urls)
|
| 270 |
return images, metadata, ""
|
| 271 |
|
| 272 |
search_by_image_button.click(
|
| 273 |
handle_search_by_image,
|
| 274 |
+
inputs=[uploaded_image, fashion_house_img, designer_img, category_img, season_img, start_year_img, end_year_img, fh_country_img, fh_city_img, designer_nationality_img, designer_birth_year_start_img, designer_birth_year_end_img],
|
| 275 |
outputs=[uploaded_result_gallery, uploaded_metadata_state, uploaded_metadata_output]
|
| 276 |
)
|
| 277 |
|
|
|
|
| 330 |
)
|
| 331 |
|
| 332 |
with gr.Tab("Query on FashionDB"):
|
| 333 |
+
|
| 334 |
+
# Front-page SPARQL query UI and examples
|
| 335 |
+
with gr.Accordion("Query FashionDB (SPARQL)", open=True):
|
| 336 |
gr.Markdown(
|
| 337 |
"### 🔗 Query FashionDB SPARQL Endpoint\n"
|
| 338 |
"[Click here to open the SPARQL endpoint](https://fashionwiki.wikibase.cloud/query/)",
|
| 339 |
elem_id="sparql-link"
|
| 340 |
)
|
| 341 |
+
with gr.Row():
|
| 342 |
+
example_dropdown = gr.Dropdown(label="Example SPARQL Queries", choices=list(EXAMPLE_QUERIES.keys()))
|
| 343 |
+
query_text = gr.Textbox(label="SPARQL Query", lines=10)
|
| 344 |
+
open_link_md = gr.Markdown()
|
| 345 |
+
|
| 346 |
+
def on_example_change(example_key):
|
| 347 |
+
if not example_key or example_key not in EXAMPLE_QUERIES:
|
| 348 |
+
return "", ""
|
| 349 |
+
q = EXAMPLE_QUERIES[example_key].strip()
|
| 350 |
+
encoded = urllib.parse.quote(q)
|
| 351 |
+
link = f"[Open in SPARQL Editor](https://fashionwiki.wikibase.cloud/query/#query={encoded})"
|
| 352 |
+
return q, link
|
| 353 |
+
|
| 354 |
+
example_dropdown.change(
|
| 355 |
+
on_example_change,
|
| 356 |
+
inputs=[example_dropdown],
|
| 357 |
+
outputs=[query_text, open_link_md]
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
def on_query_change(q):
|
| 361 |
+
q = (q or "").strip()
|
| 362 |
+
if not q:
|
| 363 |
+
return ""
|
| 364 |
+
encoded = urllib.parse.quote(q)
|
| 365 |
+
return f"[Open in SPARQL Editor](https://fashionwiki.wikibase.cloud/query/#query={encoded})"
|
| 366 |
+
|
| 367 |
+
query_text.change(
|
| 368 |
+
on_query_change,
|
| 369 |
+
inputs=[query_text],
|
| 370 |
+
outputs=[open_link_md]
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
|
| 374 |
back_button = gr.Button("Back to Home")
|
| 375 |
|
example_queries.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Example SPARQL queries for FashionDB
|
| 2 |
+
EXAMPLE_QUERIES = {
|
| 3 |
+
# "All fashion houses with country and city": (
|
| 4 |
+
# """
|
| 5 |
+
# PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 6 |
+
# PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 7 |
+
# PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 8 |
+
# PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
| 9 |
+
# PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
| 10 |
+
# PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
| 11 |
+
|
| 12 |
+
# SELECT ?fashion_house ?fashion_houseLabel ?countryLabel ?cityLabel WHERE {
|
| 13 |
+
# ?fashion_house wbt:P31 wb:Q783794; # instance of fashion house (example)
|
| 14 |
+
# wbt:P17 ?country. # country
|
| 15 |
+
# OPTIONAL { ?fashion_house wbt:P131 ?city. } # located in the administrative territorial entity
|
| 16 |
+
# SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
| 17 |
+
# }
|
| 18 |
+
# LIMIT 50
|
| 19 |
+
# """
|
| 20 |
+
# ),
|
| 21 |
+
"which designer were born in 1969": (
|
| 22 |
+
"PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>\nPREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>\nPREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/> \nPREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/> \nPREFIX p: <https://fashionwiki.wikibase.cloud/prop/> \nPREFIX prov: <http://www.w3.org/ns/prov#> \nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n\nSELECT ?designerLabel ?birthdate WHERE {\n ?designer wbt:P3 ?birthdate .\n FILTER (YEAR(?birthdate) = 1969)\n\n SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". }\n}"
|
| 23 |
+
),
|
| 24 |
+
|
| 25 |
+
"Which designers studied at Central Saint Martins?": (
|
| 26 |
+
"PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>\nPREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>\nPREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/> \nPREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>\nPREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/> \nPREFIX p: <https://fashionwiki.wikibase.cloud/prop/> \nPREFIX prov: <http://www.w3.org/ns/prov#> \n\nSELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {\n # Restrict to designers who are instances of fashion designer (Q5)\n\n ?fashion_designer wbt:P2 wb:Q5.\n ?fashion_designer wbt:P9 ?educated_at.\n ?educated_at rdfs:label 'Central Saint Martins'@en . \n\n\n # Retrieve references from the statement\n OPTIONAL {\n ?statement prov:wasDerivedFrom ?reference.\n ?reference pr:P24 ?reference_URL.\n }\n\n # Retrieve labels for the fashion designer\n SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \n} \nGROUP BY ?fashion_designerLabel \nORDER BY ?fashion_designerLabel"
|
| 27 |
+
),
|
| 28 |
+
}
|
search_fashionDB.py
CHANGED
|
@@ -80,10 +80,17 @@ def search_images_by_image(uploaded_image, df, embeddings,embeddings_urls, top_
|
|
| 80 |
sims = cosine_similarity([image_emb], embeddings)[0]
|
| 81 |
top_indices = np.argsort(sims)[::-1][:top_k]
|
| 82 |
top_urls = [embeddings_urls[i] for i in top_indices]
|
| 83 |
-
metadata
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
return top_urls,
|
| 87 |
|
| 88 |
|
| 89 |
|
|
@@ -97,16 +104,41 @@ def search_images_by_text(text, df, embeddings, embeddings_urls, top_k=30):
|
|
| 97 |
with torch.no_grad():
|
| 98 |
text_emb = model.get_text_features(**inputs).cpu().numpy()
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
sims = cosine_similarity(text_emb, embeddings_filtered)[0]
|
| 104 |
-
sims = np.asarray(sims).flatten()
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def get_similar_images(df, image_key, embeddings, embedding_map, embeddings_urls, top_k=5):
|
| 112 |
if image_key not in embedding_map:
|
|
|
|
| 80 |
sims = cosine_similarity([image_emb], embeddings)[0]
|
| 81 |
top_indices = np.argsort(sims)[::-1][:top_k]
|
| 82 |
top_urls = [embeddings_urls[i] for i in top_indices]
|
| 83 |
+
# Build metadata in the same order as top_urls
|
| 84 |
+
df_subset = df[df["url"].isin(top_urls)].copy()
|
| 85 |
+
records = df_subset.to_dict(orient="records")
|
| 86 |
+
by_url = {}
|
| 87 |
+
for r in records:
|
| 88 |
+
u = r.get("url")
|
| 89 |
+
if u is not None and u not in by_url:
|
| 90 |
+
by_url[u] = r
|
| 91 |
+
ordered_metadata = [by_url[u] for u in top_urls if u in by_url]
|
| 92 |
|
| 93 |
+
return top_urls, ordered_metadata
|
| 94 |
|
| 95 |
|
| 96 |
|
|
|
|
| 104 |
with torch.no_grad():
|
| 105 |
text_emb = model.get_text_features(**inputs).cpu().numpy()
|
| 106 |
|
| 107 |
+
# Build URL -> index map once per call
|
| 108 |
+
url_to_index = {str(url): idx for idx, url in enumerate(embeddings_urls)}
|
| 109 |
+
# Collect indices of embeddings corresponding to filtered df URLs
|
| 110 |
+
filtered_urls = df["url"].astype(str).tolist()
|
| 111 |
+
filtered_indices = [url_to_index[u] for u in filtered_urls if u in url_to_index]
|
| 112 |
+
|
| 113 |
+
if not filtered_indices:
|
| 114 |
+
return [], []
|
| 115 |
+
|
| 116 |
+
embeddings_filtered = embeddings[filtered_indices]
|
| 117 |
sims = cosine_similarity(text_emb, embeddings_filtered)[0]
|
| 118 |
+
sims = np.asarray(sims).flatten()
|
| 119 |
+
|
| 120 |
+
# Rank within the filtered set
|
| 121 |
+
top_indices_local = np.argsort(sims)[::-1][:top_k]
|
| 122 |
+
# Map local ranks back to URLs in the same order, dedupe while preserving order
|
| 123 |
+
ranked_urls = [embeddings_urls[filtered_indices[i]] for i in top_indices_local]
|
| 124 |
+
seen = set()
|
| 125 |
+
top_urls = []
|
| 126 |
+
for u in ranked_urls:
|
| 127 |
+
if u not in seen:
|
| 128 |
+
seen.add(u)
|
| 129 |
+
top_urls.append(u)
|
| 130 |
+
|
| 131 |
+
# Build metadata in the same order as top_urls
|
| 132 |
+
df_subset = df[df["url"].isin(top_urls)].copy()
|
| 133 |
+
records = df_subset.to_dict(orient="records")
|
| 134 |
+
by_url = {}
|
| 135 |
+
for r in records:
|
| 136 |
+
u = r.get("url")
|
| 137 |
+
if u is not None and u not in by_url:
|
| 138 |
+
by_url[u] = r
|
| 139 |
+
ordered_metadata = [by_url[u] for u in top_urls if u in by_url]
|
| 140 |
+
|
| 141 |
+
return top_urls, ordered_metadata
|
| 142 |
|
| 143 |
def get_similar_images(df, image_key, embeddings, embedding_map, embeddings_urls, top_k=5):
|
| 144 |
if image_key not in embedding_map:
|