import pandas as pd import torch from pathlib import Path def correct_spelling(ingredient, df): from difflib import get_close_matches choices = df['INCI name'].dropna().str.lower().tolist() match = get_close_matches(ingredient.lower(), choices, n=1, cutoff=0.7) return match[0] if match else ingredient def load_data(): base_path = Path(__file__).resolve().parent.parent data_path = base_path / "data" df_cosing = pd.read_csv(data_path / "COSING_Cleaned_Normalized_v7(1).csv") df_brand = pd.read_csv(data_path / "brend_cleaned.csv") df_brand['text'] = df_brand['name'].astype(str) + " " + df_brand['ingridients'].astype(str) product_embeddings = torch.load(base_path / "product_embeddings.pt", map_location='cpu') return df_cosing, df_brand, product_embeddings