File size: 814 Bytes
030432c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import pandas as pd
import torch
from pathlib import Path
def correct_spelling(ingredient, df):
from difflib import get_close_matches
choices = df['INCI name'].dropna().str.lower().tolist()
match = get_close_matches(ingredient.lower(), choices, n=1, cutoff=0.7)
return match[0] if match else ingredient
def load_data():
base_path = Path(__file__).resolve().parent.parent
data_path = base_path / "data"
df_cosing = pd.read_csv(data_path / "COSING_Cleaned_Normalized_v7(1).csv")
df_brand = pd.read_csv(data_path / "brend_cleaned.csv")
df_brand['text'] = df_brand['name'].astype(str) + " " + df_brand['ingridients'].astype(str)
product_embeddings = torch.load(base_path / "product_embeddings.pt", map_location='cpu')
return df_cosing, df_brand, product_embeddings
|