| import pandas as pd | |
| import torch | |
| from pathlib import Path | |
| def correct_spelling(ingredient, df): | |
| from difflib import get_close_matches | |
| choices = df['INCI name'].dropna().str.lower().tolist() | |
| match = get_close_matches(ingredient.lower(), choices, n=1, cutoff=0.7) | |
| return match[0] if match else ingredient | |
| def load_data(): | |
| base_path = Path(__file__).resolve().parent.parent | |
| data_path = base_path / "data" | |
| df_cosing = pd.read_csv(data_path / "COSING_Cleaned_Normalized_v7(1).csv") | |
| df_brand = pd.read_csv(data_path / "brend_cleaned.csv") | |
| df_brand['text'] = df_brand['name'].astype(str) + " " + df_brand['ingridients'].astype(str) | |
| product_embeddings = torch.load(base_path / "product_embeddings.pt", map_location='cpu') | |
| return df_cosing, df_brand, product_embeddings | |