Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import kagglehub | |
| from sentence_transformers import SentenceTransformer, util | |
| import pandas as pd | |
| from rapidfuzz import fuzz, process | |
| import os | |
| # Download dataset from Kaggl | |
| dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset") | |
| csv_file = os.path.join(dataset_path, "vestiaire.csv") | |
| # Load dataset and check column names | |
| df = pd.read_csv(csv_file, nrows=5) | |
| print("Column Names in Dataset:", df.columns) | |
| # Function to get the correct column name | |
| def get_column_name(possible_names, df): | |
| for name in possible_names: | |
| if name in df.columns: | |
| return name | |
| raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}") | |
| # Map column names dynamically | |
| designer_column = get_column_name(["brand_name"], df) | |
| category_column = get_column_name(["product_category"], df) | |
| # Load full dataset | |
| df = pd.read_csv(csv_file, nrows=10000) | |
| # Extract relevant data | |
| designer_data = df[designer_column].dropna().unique().tolist() | |
| category_data = df[category_column].dropna().unique().tolist() | |
| # Load the model | |
| model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| model = SentenceTransformer(model_name) | |
| # Function to find synonyms dynamically with fallback | |
| def find_synonym(word, top_n=1): | |
| query_embedding = model.encode(word, convert_to_tensor=True) | |
| combined_data = designer_data + category_data | |
| results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n) | |
| # Check if results exist | |
| if results and len(results[0]) > 0: | |
| return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6] | |
| return [] # Return an empty list if no results | |
| # Function to correct spellings | |
| def correct_spelling(word): | |
| matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1) | |
| if matches: | |
| best_match, score, _ = matches[0] | |
| if score > 70: | |
| return best_match | |
| return word | |
| # Autocomplete function with safe handling of synonyms | |
| def autocomplete(query): | |
| if not query.strip(): | |
| return "None", "None", [], [] | |
| original_query = query.strip() | |
| corrected_query = correct_spelling(original_query) | |
| synonym_results = find_synonym(corrected_query, top_n=1) | |
| synonym_query = synonym_results[0] if synonym_results else corrected_query | |
| # Perform fuzzy matching for designers and categories separately | |
| designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5) | |
| category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5) | |
| # Extract top matches for designers and categories | |
| designer_suggestions = [match[0] for match in designer_matches] | |
| category_suggestions = [match[0] for match in category_matches] | |
| # Detect if spelling correction or synonym replacement occurred | |
| correction_status = f"{original_query} β {corrected_query}" if original_query != corrected_query else "None" | |
| synonym_status = f"{corrected_query} β {synonym_query}" if corrected_query != synonym_query else "None" | |
| return correction_status, synonym_status, designer_suggestions, category_suggestions | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)") | |
| query = gr.Textbox(label="Start typing for autocomplete") | |
| correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False) | |
| synonym_output = gr.Textbox(label="Synonym Applied", interactive=False) | |
| designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False) | |
| category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False) | |
| query.change( | |
| fn=autocomplete, | |
| inputs=query, | |
| outputs=[correction_output, synonym_output, designer_output, category_output] | |
| ) | |
| demo.launch(share=True) | |