| import pandas as pd |
| import torch |
| from pathlib import Path |
|
|
| def correct_spelling(ingredient, df): |
| from difflib import get_close_matches |
| choices = df['INCI name'].dropna().str.lower().tolist() |
| match = get_close_matches(ingredient.lower(), choices, n=1, cutoff=0.7) |
| return match[0] if match else ingredient |
|
|
| def load_data(): |
| base_path = Path(__file__).resolve().parent.parent |
| data_path = base_path / "data" |
|
|
| df_cosing = pd.read_csv(data_path / "COSING_Cleaned_Normalized_v7(1).csv") |
| df_brand = pd.read_csv(data_path / "brend_cleaned.csv") |
| df_brand['text'] = df_brand['name'].astype(str) + " " + df_brand['ingridients'].astype(str) |
|
|
| product_embeddings = torch.load(base_path / "product_embeddings.pt", map_location='cpu') |
| |
| return df_cosing, df_brand, product_embeddings |
|
|