DINGOLANI commited on
Commit
7930eed
·
verified ·
1 Parent(s): 3f671f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -24
app.py CHANGED
@@ -1,3 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import kagglehub
3
  from sentence_transformers import SentenceTransformer, util
@@ -7,13 +100,13 @@ import os
7
 
8
  # Download dataset from Kaggle
9
  dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
10
- csv_file = os.path.join(dataset_path, "vestiaire.csv") # Ensure correct filename
11
 
12
  # Load dataset and check column names
13
  df = pd.read_csv(csv_file, nrows=5)
14
  print("Column Names in Dataset:", df.columns)
15
 
16
- # Check column mappings
17
  def get_column_name(possible_names, df):
18
  for name in possible_names:
19
  if name in df.columns:
@@ -23,66 +116,71 @@ def get_column_name(possible_names, df):
23
  # Map column names dynamically
24
  designer_column = get_column_name(["brand_name"], df)
25
  category_column = get_column_name(["product_category"], df)
26
- product_column = get_column_name(["product_name"], df)
27
 
28
  # Load full dataset
29
  df = pd.read_csv(csv_file, nrows=10000)
30
 
31
- designers = df[designer_column].dropna().unique().tolist()
32
- categories = df[category_column].dropna().unique().tolist()
33
- products = df[product_column].dropna().tolist()
34
 
35
- # Merge into one dataset for autocomplete
36
- autocomplete_data = designers + categories + products
37
- autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
38
-
39
- # Encode all items in the dataset into embeddings
40
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
41
  model = SentenceTransformer(model_name)
42
- autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
43
 
44
  # Function to find synonyms dynamically
45
  def find_synonym(word, top_n=1):
46
  query_embedding = model.encode(word, convert_to_tensor=True)
47
- results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
48
- return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
49
 
50
  # Function to correct spellings
51
  def correct_spelling(word):
52
- matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=1)
53
  if matches:
54
  best_match, score, _ = matches[0]
55
  if score > 70:
56
  return best_match
57
  return word
58
 
59
- # Autocomplete function with tracking
60
  def autocomplete(query):
61
  if not query.strip():
62
- return "None", "None", "No suggestions found"
63
 
64
  original_query = query.strip()
65
  corrected_query = correct_spelling(original_query)
66
  synonym_query = find_synonym(corrected_query, top_n=1)[0] if corrected_query != original_query else corrected_query
67
 
68
- # Perform fuzzy matching with synonyms included
69
- matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
70
- suggestions = "\n".join([match[0] for match in matches])
 
 
 
 
71
 
 
72
  correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
73
  synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
74
 
75
- return correction_status, synonym_status, suggestions
76
 
77
  # Gradio UI
78
  with gr.Blocks() as demo:
79
- gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Vestiaire Dataset)")
80
 
81
  query = gr.Textbox(label="Start typing for autocomplete")
82
  correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
83
  synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
84
- suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
 
85
 
86
- query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
 
 
 
 
87
 
88
  demo.launch()
 
 
1
+ Vestiaire Autocomplete
2
+ 1
3
+ 2
4
+ 3
5
+ 4
6
+ 5
7
+ 6
8
+ 7
9
+ 8
10
+ 9
11
+ 10
12
+ 11
13
+ 12
14
+ 13
15
+ 14
16
+ 15
17
+ 16
18
+ 17
19
+ 18
20
+ 19
21
+ 20
22
+ 21
23
+ 22
24
+ 23
25
+ 24
26
+ 25
27
+ 26
28
+ 27
29
+ 28
30
+ 29
31
+ 30
32
+ 31
33
+ 32
34
+ 33
35
+ 34
36
+ 35
37
+ 36
38
+ 37
39
+ 38
40
+ 39
41
+ 40
42
+ 41
43
+ 42
44
+ 43
45
+ 44
46
+ 45
47
+ 46
48
+ 47
49
+ 48
50
+ 49
51
+ 50
52
+ 51
53
+ 52
54
+ 53
55
+ 54
56
+ 55
57
+ 56
58
+ 57
59
+ 58
60
+ 59
61
+ 60
62
+ 61
63
+ 62
64
+ 63
65
+ 64
66
+ 65
67
+ 66
68
+ 67
69
+ 68
70
+ 69
71
+ 70
72
+ 71
73
+ 72
74
+ 73
75
+ 74
76
+ 75
77
+ 76
78
+ 77
79
+ 78
80
+ 79
81
+ 80
82
+ 81
83
+ 82
84
+ 83
85
+ 84
86
+ 85
87
+ 86
88
+ 87
89
+ 88
90
+ 89
91
+ 90
92
+ 91
93
+ 92
94
  import gradio as gr
95
  import kagglehub
96
  from sentence_transformers import SentenceTransformer, util
 
100
 
101
  # Download dataset from Kaggle
102
  dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
103
+ csv_file = os.path.join(dataset_path, "vestiaire.csv")
104
 
105
  # Load dataset and check column names
106
  df = pd.read_csv(csv_file, nrows=5)
107
  print("Column Names in Dataset:", df.columns)
108
 
109
+ # Function to get the correct column name
110
  def get_column_name(possible_names, df):
111
  for name in possible_names:
112
  if name in df.columns:
 
116
  # Map column names dynamically
117
  designer_column = get_column_name(["brand_name"], df)
118
  category_column = get_column_name(["product_category"], df)
 
119
 
120
  # Load full dataset
121
  df = pd.read_csv(csv_file, nrows=10000)
122
 
123
+ # Extract relevant data
124
+ designer_data = df[designer_column].dropna().unique().tolist()
125
+ category_data = df[category_column].dropna().unique().tolist()
126
 
127
+ # Load the model
 
 
 
 
128
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
129
  model = SentenceTransformer(model_name)
 
130
 
131
  # Function to find synonyms dynamically
132
  def find_synonym(word, top_n=1):
133
  query_embedding = model.encode(word, convert_to_tensor=True)
134
+ results = util.semantic_search(query_embedding, model.encode(designer_data + category_data, convert_to_tensor=True), top_k=top_n)
135
+ return [designer_data + category_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
136
 
137
  # Function to correct spellings
138
  def correct_spelling(word):
139
+ matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
140
  if matches:
141
  best_match, score, _ = matches[0]
142
  if score > 70:
143
  return best_match
144
  return word
145
 
146
+ # Autocomplete function
147
  def autocomplete(query):
148
  if not query.strip():
149
+ return "None", "None", [], []
150
 
151
  original_query = query.strip()
152
  corrected_query = correct_spelling(original_query)
153
  synonym_query = find_synonym(corrected_query, top_n=1)[0] if corrected_query != original_query else corrected_query
154
 
155
+ # Perform fuzzy matching for designers and categories separately
156
+ designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
157
+ category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)
158
+
159
+ # Extract top matches for designers and categories
160
+ designer_suggestions = [match[0] for match in designer_matches]
161
+ category_suggestions = [match[0] for match in category_matches]
162
 
163
+ # Detect if spelling correction or synonym replacement occurred
164
  correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
165
  synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
166
 
167
+ return correction_status, synonym_status, designer_suggestions, category_suggestions
168
 
169
  # Gradio UI
170
  with gr.Blocks() as demo:
171
+ gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")
172
 
173
  query = gr.Textbox(label="Start typing for autocomplete")
174
  correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
175
  synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
176
+ designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
177
+ category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)
178
 
179
+ query.change(
180
+ fn=autocomplete,
181
+ inputs=query,
182
+ outputs=[correction_output, synonym_output, designer_output, category_output]
183
+ )
184
 
185
  demo.launch()
186
+