import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) from sklearn.metrics.pairwise import cosine_similarity import re class SimilarityRecommender(): def __init__(self, file): data = pd.read_excel(file, index_col=0) columns_brands = [re.sub(r'[^a-zA-Z ]', '', brand.upper()) for brand in list(data.index)] ex = pd.DataFrame(cosine_similarity(data), index=columns_brands, columns=columns_brands) for brand in ex.index: ex.loc[brand, brand] = np.nan self.similarity_matrix = ex.assign(best_similarity=ex.idxmax())['best_similarity'] def make_recommendation(self, item): key = re.sub(r'[^a-zA-Z ]', '', item.upper()) key = key.replace('ADIDAS', 'ADIDAS SB') if key not in self.similarity_matrix.index: print(self.similarity_matrix.index) raise ValueError(f'{key} not in matrix') return self.similarity_matrix[key] if __name__ == '__main__': rec = SimilarityRecommender("./TopBrands.xlsx") print(rec.make_recommendation("louis vuitton-1")) print(rec.similarity_matrix) print(rec.make_recommendation("Lacoste"))