traopia commited on
Commit
44b94fe
·
1 Parent(s): 08d32ab

reorganizing

Browse files
Files changed (9) hide show
  1. .DS_Store +0 -0
  2. .gitignore +4 -0
  3. README.md +2 -2
  4. __pycache__/search.cpython-311.pyc +0 -0
  5. app_old.py +0 -118
  6. app_onetab.py +0 -223
  7. gradio_app1.py +0 -110
  8. playground.ipynb +0 -23
  9. playground.py +0 -23
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitignore CHANGED
@@ -1 +1,5 @@
1
  chroma_db/
 
 
 
 
 
1
  chroma_db/
2
+ .gradio/
3
+ src/
4
+ src1/
5
+ old_app_code/
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Ask FashionDB
3
- emoji: 🐠
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
 
1
  ---
2
+ title: High Fashion Explorer
3
+ emoji: 🧵
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
__pycache__/search.cpython-311.pyc ADDED
Binary file (3.25 kB). View file
 
app_old.py DELETED
@@ -1,118 +0,0 @@
1
- import gradio as gr
2
- #example just for fun
3
- from src.visual_qa import main_text_retrieve_images
4
- from src.generate_queries_alternative import main_generate_queries
5
- import time
6
- import pandas as pd
7
-
8
- import spacy
9
-
10
- # Try to load the model, and download it if missing
11
- try:
12
- nlp = spacy.load("en_core_web_sm")
13
- except OSError:
14
- from spacy.cli import download
15
- download("en_core_web_sm")
16
- nlp = spacy.load("en_core_web_sm")
17
-
18
-
19
-
20
- def handle_structured_query(question, sort_by=""):
21
- if not question:
22
- return "Please ask something 🙂", pd.DataFrame(), []
23
-
24
- try:
25
- start = time.time()
26
- result_query, sparql_query = main_generate_queries(question)
27
- elapsed = round(time.time() - start, 2)
28
- except Exception as e:
29
- return f"⚠️ Query failed: {e}", pd.DataFrame(), []
30
-
31
- if isinstance(result_query, str):
32
- return result_query, pd.DataFrame(), []
33
-
34
- if not result_query:
35
- return f"No results for '{question}'. Try rephrasing. (⏱ {elapsed}s)", pd.DataFrame(), []
36
-
37
- df = pd.DataFrame(result_query)
38
- if sort_by and sort_by in df.columns:
39
- df = df.sort_values(by=sort_by)
40
-
41
-
42
- if "image_url" in df.columns:
43
- columns_of_interest = ["image_url", "year","fashion_collectionLabel", "reference_URL"]
44
- df = df[columns_of_interest]
45
- # Create a gallery: each item is (image_url, metadata string)
46
- gallery_items = []
47
- for _, row in df.iterrows():
48
- image_url = row.get("image_url")
49
- if not image_url:
50
- continue
51
- # Caption from other fields
52
- caption = " | ".join(f"{k}: {v}" for k, v in row.items() if k != "image_url" and pd.notnull(v))
53
- gallery_items.append((image_url, caption))
54
- return f"Query returned {len(gallery_items)} image(s) in {elapsed} seconds.", pd.DataFrame(), gallery_items
55
-
56
- return f"Query returned a table with {len(df)} row(s) in {elapsed} seconds.", df, []
57
-
58
-
59
-
60
-
61
- def handle_image_query(text):
62
- if not text:
63
- return []
64
-
65
- try:
66
- records = main_text_retrieve_images(text)
67
- print(f"Retrieved {len(records)} records for query: {text}")
68
- print(records)
69
- except Exception as e:
70
- return [("https://via.placeholder.com/300x200?text=Error", f"Error: {e}")]
71
-
72
- gallery_items = []
73
- for item in records:
74
- image_url = item.get("image_url")
75
- if not image_url:
76
- continue
77
- # Build a simple caption from the remaining fields
78
- caption = " | ".join(f"{k}: {v}" for k, v in item.items() if k != "image_url")
79
- gallery_items.append((image_url, caption))
80
-
81
- return gallery_items
82
-
83
- # --- UI --- #
84
- with gr.Blocks() as demo:
85
- gr.Markdown("# 🧵 FashionDB Interface")
86
-
87
-
88
- with gr.Tab("Structured Query"):
89
- gr.Markdown("Ask FashionDB anything and view results with images + metadata.")
90
-
91
- with gr.Row():
92
- query_input = gr.Textbox(label="Your question")
93
- sort_input = gr.Textbox(label="Sort by (optional column name)", placeholder="e.g. year")
94
-
95
- query_submit = gr.Button("Submit")
96
-
97
- query_text_output = gr.Textbox(label="Message", interactive=False)
98
- query_table_output = gr.Dataframe(label="Tabular Result", interactive=False)
99
- query_gallery_output = gr.Gallery(label="Image Gallery")
100
- query_submit.click(
101
- fn=handle_structured_query,
102
- inputs=[query_input, sort_input],
103
- outputs=[
104
- query_text_output,
105
- query_table_output,
106
- query_gallery_output
107
- ]
108
- )
109
-
110
- with gr.Tab("Image Retrieval"):
111
- gr.Markdown("Search for similar fashion show images based on a text description.")
112
- image_text = gr.Textbox(label="Describe the kind of images you're looking for")
113
- image_submit = gr.Button("Find Images")
114
- image_gallery = gr.Gallery(label="Retrieved Images")
115
-
116
- image_submit.click(handle_image_query, inputs=image_text, outputs=image_gallery)
117
-
118
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_onetab.py DELETED
@@ -1,223 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import numpy as np
4
- from search import search_images_by_text, get_similar_images, search_images_by_image
5
- import requests
6
- from io import BytesIO
7
-
8
- def create_collection_url(row):
9
- base_url = "https://www.vogue.com/fashion-shows/"
10
- season = str(row["season"]).lower()
11
- year = str(row["year"])
12
- category = str(row["category"]).lower() if pd.notna(row["category"]) and row["category"] and str(row["category"]).lower() != "nan" else None
13
- designer = str(row["designer"]).lower().replace(" ", "-")
14
-
15
- # Add city if available
16
- city = str(row["city"]).lower().replace(" ", "-") if pd.notna(row["city"]) and row["city"] and str(row["city"]).lower() != "nan" else None
17
-
18
- if pd.isna(category) or category is None or category == "nan":
19
- if city:
20
- return f"{base_url}{city}-{season}-{year}/{designer}"
21
- else:
22
- return f"{base_url}{season}-{year}/{designer}"
23
- else:
24
- if city:
25
- return f"{base_url}{city}-{season}-{year}-{category}/{designer}"
26
- else:
27
- return f"{base_url}{season}-{year}-{category}/{designer}"
28
-
29
- import requests
30
- from io import BytesIO
31
- #@st.cache_data(show_spinner="Loading FashionDB...")
32
- def load_data_hf():
33
- # Load the Parquet file directly from Hugging Face
34
- df_url = "https://huggingface.co/datasets/traopia/vogue_runway_small/resolve/main/VogueRunway.parquet"
35
- df = pd.read_parquet(df_url)
36
-
37
- # Load the .npy file using requests
38
- npy_url = "https://huggingface.co/datasets/traopia/vogue_runway_small/resolve/main/VogueRunway_image.npy"
39
- response = requests.get(npy_url)
40
- response.raise_for_status() # Raise error if download fails
41
- embeddings = np.load(BytesIO(response.content))
42
- df['collection'] = df.apply(create_collection_url, axis=1)
43
- return df, embeddings
44
-
45
-
46
- # from huggingface_hub import hf_hub_download
47
- # def load_data1():
48
- # # Login using e.g. `huggingface-cli login` to access this dataset
49
- # path = hf_hub_download(
50
- # repo_id="traopia/fashion_show_data_all_embeddings",
51
- # filename="fashion_show_data_all_embeddings.json"
52
- # )
53
- # df = pd.read_json(path, lines = True)
54
-
55
- # #df = pd.read_json("hf://datasets/traopia/fashion_show_data_all_embeddings.json/fashion_show_data_all_embeddings.json", lines=True)
56
- # df["fashion_clip_image"] = df["fashion_clip_image"].apply(lambda x: x[0] if isinstance(x, list) else x)
57
- # df["image_urls"] = df["image_urls"].apply(lambda x: x[0] if x is not None else None)
58
- # df = df.rename(columns={"fashion_house":"designer", "image_urls":"url", "URL":"collection"})
59
-
60
- # df = df.dropna(subset="fashion_clip_image")
61
- # df = df.reset_index(drop=True)
62
- # df["key"] = df.index
63
- # embeddings = np.vstack(df["fashion_clip_image"].values)
64
-
65
- # return df, embeddings
66
-
67
- df, embeddings = load_data_hf()
68
-
69
- # Filter and search
70
- def filter_and_search(fashion_house, category, season, start_year, end_year, query):
71
- filtered = df.copy()
72
-
73
- if fashion_house:
74
- filtered = filtered[filtered['designer'].isin(fashion_house)]
75
- if category:
76
- filtered = filtered[filtered['category'].isin(category)]
77
- if season:
78
- filtered = filtered[filtered['season'].isin(season)]
79
- filtered = filtered[(filtered['year'] >= start_year) & (filtered['year'] <= end_year)]
80
-
81
- if query:
82
- results = search_images_by_text(query, filtered, embeddings)
83
- else:
84
- results = filtered.head(30)
85
-
86
- image_urls = results["url"].tolist()
87
- metadata = results.to_dict(orient="records")
88
- return image_urls, metadata
89
-
90
- # Display metadata and similar
91
- def show_metadata(idx, metadata):
92
- item = metadata[idx]
93
- out = ""
94
- for field in ["designer", "season", "year", "category"]:
95
- if field in item and pd.notna(item[field]):
96
- out += f"**{field.title()}**: {item[field]}\n"
97
- if 'collection' in item and pd.notna(item['collection']):
98
- out += f"\n[View Collection]({item['collection']})"
99
- return out
100
-
101
- def find_similar(idx, metadata):
102
- if not isinstance(idx, int) or idx >= len(metadata) or idx < 0:
103
- return [] # or gr.update(visible=False)
104
- key = metadata[idx]["key"]
105
- similar_df = get_similar_images(df, key, embeddings, top_k=5)
106
- return similar_df["url"].tolist(), similar_df.to_dict(orient="records")
107
-
108
-
109
-
110
- # Gradio UI
111
- with gr.Blocks() as demo:
112
- gr.Markdown("# 👗 FashionDB Explorer")
113
-
114
- with gr.Row():
115
- fashion_house = gr.Dropdown(label="Fashion House", choices=sorted(df["designer"].dropna().unique()), multiselect=True)
116
- category = gr.Dropdown(label="Category", choices=sorted(df["category"].dropna().unique()), multiselect=True)
117
- season = gr.Dropdown(label="Season", choices=sorted(df["season"].dropna().unique()), multiselect=True)
118
-
119
-
120
- min_year = int(df['year'].min())
121
- max_year = int(df['year'].max())
122
-
123
- start_year = gr.Slider(label="Start Year", minimum=min_year, maximum=max_year, value=2000, step=1)
124
- end_year = gr.Slider(label="End Year", minimum=min_year, maximum=max_year, value=2024, step=1)
125
-
126
- query = gr.Textbox(label="Search by text", placeholder="(optional): e.g., pink dress ")
127
- search_button = gr.Button("Search by text")
128
-
129
- uploaded_image = gr.Image(label="Upload an image", type="pil") # or type="pil" if you prefer PIL Image object
130
- search_by_image_button = gr.Button("Search by Image")
131
-
132
- def handle_search_by_image(uploaded_image):
133
- if uploaded_image is None:
134
- return [], "Please upload an image first."
135
- results_df = search_images_by_image(uploaded_image, df, embeddings)
136
- # Convert results DataFrame to image URLs (or paths) for gallery display
137
- images = results_df['url'].tolist()
138
- metadata = results_df.to_dict(orient='records')
139
- return images, metadata, ""
140
-
141
- uploaded_metadata_state = gr.State([])
142
- uploaded_metadata_output = gr.Markdown()
143
- uploaded_result_gallery = gr.Gallery(label="Search Results by Image", columns=5, height="auto")
144
-
145
- search_by_image_button.click(
146
- fn=handle_search_by_image,
147
- inputs=[uploaded_image],
148
- outputs=[uploaded_result_gallery, uploaded_metadata_state, uploaded_metadata_output]
149
- )
150
-
151
- result_gallery = gr.Gallery(label="Search Results", columns=5, height="auto")
152
- metadata_output = gr.Markdown()
153
- reference_image = gr.Image(label="Reference Image", interactive=False)
154
- similar_gallery = gr.Gallery(label="Similar Images", columns = 5, height="auto")
155
-
156
- metadata_state = gr.State([])
157
- selected_idx = gr.Number(value=0, visible=False)
158
-
159
- def handle_search(*args):
160
- imgs, meta = filter_and_search(*args)
161
- return imgs, meta, "", []
162
-
163
- search_button.click(
164
- handle_search,
165
- inputs=[fashion_house, category, season, start_year, end_year, query],
166
- outputs=[result_gallery, metadata_state, metadata_output, similar_gallery]
167
- )
168
-
169
-
170
- def handle_click(evt: gr.SelectData, metadata):
171
- idx = evt.index
172
- md = show_metadata(idx, metadata)
173
- img_path = metadata[idx]["url"]
174
- return idx, md, img_path
175
-
176
-
177
-
178
- result_gallery.select(
179
- handle_click,
180
- inputs=[metadata_state],
181
- outputs=[selected_idx, metadata_output, reference_image]
182
- )
183
-
184
- def show_similar(idx, metadata):
185
- if idx is None or not str(idx).isdigit():
186
- return [],[] # safe fallback
187
- return find_similar(int(idx), metadata)
188
-
189
- similar_metadata_state = gr.State()
190
- similar_metadata_output = gr.Markdown()
191
-
192
- show_similar_button = gr.Button("Show Similar Images")
193
- show_similar_button.click(
194
- show_similar,
195
- inputs=[selected_idx, metadata_state],
196
- outputs=[similar_gallery, similar_metadata_state]
197
- )
198
-
199
-
200
- def handle_similar_click(evt: gr.SelectData, metadata):
201
- idx = evt.index
202
- md = show_metadata(idx, metadata)
203
- img_path = metadata[idx]["url"]
204
- return idx, md, img_path
205
-
206
-
207
- similar_gallery.select(
208
- handle_similar_click,
209
- inputs=[similar_metadata_state],
210
- outputs=[selected_idx, similar_metadata_output, reference_image]
211
- )
212
-
213
- back_button = gr.Button("Back to Home")
214
-
215
- def back_to_home():
216
- return [], "", None # clear similar_gallery, metadata_output, reference image
217
-
218
- back_button.click(
219
- back_to_home,
220
- outputs=[similar_gallery, similar_metadata_output, reference_image]
221
- )
222
-
223
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_app1.py DELETED
@@ -1,110 +0,0 @@
1
- import gradio as gr
2
-
3
- # --- Handlers --- #
4
- from src1.generate_queries_alternative import main_generate_queries
5
- import time
6
- import pandas as pd
7
-
8
-
9
-
10
-
11
- def handle_structured_query(question, sort_by=""):
12
- if not question:
13
- return "Please ask something 🙂", pd.DataFrame(), []
14
-
15
- try:
16
- start = time.time()
17
- result_query, sparql_query = main_generate_queries(question)
18
- elapsed = round(time.time() - start, 2)
19
- except Exception as e:
20
- return f"⚠️ Query failed: {e}", pd.DataFrame(), []
21
-
22
- if isinstance(result_query, str):
23
- return result_query, pd.DataFrame(), []
24
-
25
- if not result_query:
26
- return f"No results for '{question}'. Try rephrasing. (⏱ {elapsed}s)", pd.DataFrame(), []
27
-
28
- df = pd.DataFrame(result_query)
29
- if sort_by and sort_by in df.columns:
30
- df = df.sort_values(by=sort_by)
31
-
32
-
33
- if "image_url" in df.columns:
34
- columns_of_interest = ["image_url", "year","fashion_collectionLabel", "reference_URL"]
35
- df = df[columns_of_interest]
36
- # Create a gallery: each item is (image_url, metadata string)
37
- gallery_items = []
38
- for _, row in df.iterrows():
39
- image_url = row.get("image_url")
40
- if not image_url:
41
- continue
42
- # Caption from other fields
43
- caption = " | ".join(f"{k}: {v}" for k, v in row.items() if k != "image_url" and pd.notnull(v))
44
- gallery_items.append((image_url, caption))
45
- return f"Query returned {len(gallery_items)} image(s) in {elapsed} seconds.", pd.DataFrame(), gallery_items
46
-
47
- return f"Query returned a table with {len(df)} row(s) in {elapsed} seconds.", df, []
48
-
49
-
50
-
51
-
52
-
53
- from src1.visual_qa import main_text_retrieve_images
54
-
55
- def handle_image_query(text):
56
- if not text:
57
- return []
58
-
59
- try:
60
- records = main_text_retrieve_images(text)
61
- except Exception as e:
62
- return [("https://via.placeholder.com/300x200?text=Error", f"Error: {e}")]
63
-
64
- gallery_items = []
65
- for item in records:
66
- image_url = item.get("image_url")
67
- if not image_url:
68
- continue
69
- # Build a simple caption from the remaining fields
70
- caption = " | ".join(f"{k}: {v}" for k, v in item.items() if k != "image_url")
71
- gallery_items.append((image_url, caption))
72
-
73
- return gallery_items
74
-
75
- # --- UI --- #
76
- with gr.Blocks() as demo:
77
- gr.Markdown("# 🧵 FashionDB Interface")
78
-
79
-
80
- with gr.Tab("Structured Query"):
81
- gr.Markdown("Ask FashionDB anything and view results with images + metadata.")
82
-
83
- with gr.Row():
84
- query_input = gr.Textbox(label="Your question")
85
- sort_input = gr.Textbox(label="Sort by (optional column name)", placeholder="e.g. start_year")
86
-
87
- query_submit = gr.Button("Submit")
88
-
89
- query_text_output = gr.Textbox(label="Message", interactive=False)
90
- query_table_output = gr.Dataframe(label="Tabular Result", interactive=False)
91
- query_gallery_output = gr.Gallery(label="Image Gallery")
92
- query_submit.click(
93
- fn=handle_structured_query,
94
- inputs=[query_input, sort_input],
95
- outputs=[
96
- query_text_output,
97
- query_table_output,
98
- query_gallery_output
99
- ]
100
- )
101
-
102
- with gr.Tab("Image Retrieval"):
103
- gr.Markdown("Search for similar fashion show images based on a text description.")
104
- image_text = gr.Textbox(label="Describe the kind of images you're looking for")
105
- image_submit = gr.Button("Find Images")
106
- image_gallery = gr.Gallery(label="Retrieved Images")
107
-
108
- image_submit.click(handle_image_query, inputs=image_text, outputs=image_gallery)
109
-
110
- demo.launch( share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playground.ipynb DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "883693d5",
7
- "metadata": {
8
- "vscode": {
9
- "languageId": "plaintext"
10
- }
11
- },
12
- "outputs": [],
13
- "source": []
14
- }
15
- ],
16
- "metadata": {
17
- "language_info": {
18
- "name": "python"
19
- }
20
- },
21
- "nbformat": 4,
22
- "nbformat_minor": 5
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
playground.py DELETED
@@ -1,23 +0,0 @@
1
- import chromadb
2
- client = chromadb.PersistentClient(path="./chroma_db") # Change path if needed
3
-
4
- # Get a list of existing collection names
5
- existing_collections = [col for col in client.list_collections()]
6
- collection_name = "clip_image_embeddings"
7
- if collection_name in existing_collections:
8
- collection = client.get_collection(name=collection_name)
9
- print(f"Using existing collection: {collection_name}")
10
- print(existing_collections)
11
-
12
- # Show up to 3 items
13
- results = collection.get(limit=3)
14
-
15
- for i in range(len(results["ids"])):
16
- print(f"\nItem {i + 1}:")
17
- print(f"ID: {results['ids'][i]}")
18
- print(f"Document: {results['documents'][i]}")
19
- print(f"Metadata: {results['metadatas'][i]}")
20
-
21
- print("Number of items:", len(collection.get()["ids"]))
22
-
23
- collection_data = collection.get()