File size: 814 Bytes
030432c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import pandas as pd
import torch
from pathlib import Path

def correct_spelling(ingredient, df):
    from difflib import get_close_matches
    choices = df['INCI name'].dropna().str.lower().tolist()
    match = get_close_matches(ingredient.lower(), choices, n=1, cutoff=0.7)
    return match[0] if match else ingredient

def load_data():
    base_path = Path(__file__).resolve().parent.parent
    data_path = base_path / "data"

    df_cosing = pd.read_csv(data_path / "COSING_Cleaned_Normalized_v7(1).csv")
    df_brand = pd.read_csv(data_path / "brend_cleaned.csv")
    df_brand['text'] = df_brand['name'].astype(str) + " " + df_brand['ingridients'].astype(str)

    product_embeddings = torch.load(base_path / "product_embeddings.pt", map_location='cpu')
    
    return df_cosing, df_brand, product_embeddings