Spaces:

DINGOLANI
/

testautosearch

Sleeping

App Files Files Community

DINGOLANI commited on Jan 28, 2025

Commit

7930eed

verified ·

1 Parent(s): 3f671f4

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -24

app.py CHANGED Viewed

@@ -1,3 +1,96 @@
 import gradio as gr
 import kagglehub
 from sentence_transformers import SentenceTransformer, util
@@ -7,13 +100,13 @@ import os
 # Download dataset from Kaggle
 dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
-csv_file = os.path.join(dataset_path, "vestiaire.csv")  # Ensure correct filename
 # Load dataset and check column names
 df = pd.read_csv(csv_file, nrows=5)
 print("Column Names in Dataset:", df.columns)
-# Check column mappings
 def get_column_name(possible_names, df):
     for name in possible_names:
         if name in df.columns:
@@ -23,66 +116,71 @@ def get_column_name(possible_names, df):
 # Map column names dynamically
 designer_column = get_column_name(["brand_name"], df)
 category_column = get_column_name(["product_category"], df)
-product_column = get_column_name(["product_name"], df)
 # Load full dataset
 df = pd.read_csv(csv_file, nrows=10000)
-designers = df[designer_column].dropna().unique().tolist()
-categories = df[category_column].dropna().unique().tolist()
-products = df[product_column].dropna().tolist()
-# Merge into one dataset for autocomplete
-autocomplete_data = designers + categories + products
-autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
-# Encode all items in the dataset into embeddings
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 model = SentenceTransformer(model_name)
-autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
 # Function to find synonyms dynamically
 def find_synonym(word, top_n=1):
     query_embedding = model.encode(word, convert_to_tensor=True)
-    results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
-    return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
 # Function to correct spellings
 def correct_spelling(word):
-    matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=1)
     if matches:
         best_match, score, _ = matches[0]
         if score > 70:
             return best_match
     return word
-# Autocomplete function with tracking
 def autocomplete(query):
     if not query.strip():
-        return "None", "None", "No suggestions found"
     original_query = query.strip()
     corrected_query = correct_spelling(original_query)
     synonym_query = find_synonym(corrected_query, top_n=1)[0] if corrected_query != original_query else corrected_query
-    # Perform fuzzy matching with synonyms included
-    matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
-    suggestions = "\n".join([match[0] for match in matches])
     correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
     synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
-    return correction_status, synonym_status, suggestions
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Vestiaire Dataset)")
     query = gr.Textbox(label="Start typing for autocomplete")
     correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
     synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
-    suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
-    query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
 demo.launch()

+Vestiaire Autocomplete
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
 import gradio as gr
 import kagglehub
 from sentence_transformers import SentenceTransformer, util
 # Download dataset from Kaggle
 dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
+csv_file = os.path.join(dataset_path, "vestiaire.csv")
 # Load dataset and check column names
 df = pd.read_csv(csv_file, nrows=5)
 print("Column Names in Dataset:", df.columns)
+# Function to get the correct column name
 def get_column_name(possible_names, df):
     for name in possible_names:
         if name in df.columns:
 # Map column names dynamically
 designer_column = get_column_name(["brand_name"], df)
 category_column = get_column_name(["product_category"], df)
 # Load full dataset
 df = pd.read_csv(csv_file, nrows=10000)
+# Extract relevant data
+designer_data = df[designer_column].dropna().unique().tolist()
+category_data = df[category_column].dropna().unique().tolist()
+# Load the model
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 model = SentenceTransformer(model_name)
 # Function to find synonyms dynamically
 def find_synonym(word, top_n=1):
     query_embedding = model.encode(word, convert_to_tensor=True)
+    results = util.semantic_search(query_embedding, model.encode(designer_data + category_data, convert_to_tensor=True), top_k=top_n)
+    return [designer_data + category_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
 # Function to correct spellings
 def correct_spelling(word):
+    matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
     if matches:
         best_match, score, _ = matches[0]
         if score > 70:
             return best_match
     return word
+# Autocomplete function
 def autocomplete(query):
     if not query.strip():
+        return "None", "None", [], []
     original_query = query.strip()
     corrected_query = correct_spelling(original_query)
     synonym_query = find_synonym(corrected_query, top_n=1)[0] if corrected_query != original_query else corrected_query
+    # Perform fuzzy matching for designers and categories separately
+    designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
+    category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)
+    # Extract top matches for designers and categories
+    designer_suggestions = [match[0] for match in designer_matches]
+    category_suggestions = [match[0] for match in category_matches]
+    # Detect if spelling correction or synonym replacement occurred
     correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
     synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
+    return correction_status, synonym_status, designer_suggestions, category_suggestions
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")
     query = gr.Textbox(label="Start typing for autocomplete")
     correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
     synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
+    designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
+    category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)
+    query.change(
+        fn=autocomplete,
+        inputs=query,
+        outputs=[correction_output, synonym_output, designer_output, category_output]
+    )
 demo.launch()