|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import re |
|
|
|
|
|
class SimilarityRecommender(): |
|
|
|
|
|
def __init__(self, file): |
|
|
data = pd.read_excel(file, index_col=0) |
|
|
columns_brands = [re.sub(r'[^a-zA-Z ]', '', brand.upper()) for brand in list(data.index)] |
|
|
ex = pd.DataFrame(cosine_similarity(data), index=columns_brands, |
|
|
columns=columns_brands) |
|
|
for brand in ex.index: |
|
|
ex.loc[brand, brand] = np.nan |
|
|
self.similarity_matrix = ex.assign(best_similarity=ex.idxmax())['best_similarity'] |
|
|
|
|
|
def make_recommendation(self, item): |
|
|
key = re.sub(r'[^a-zA-Z ]', '', item.upper()) |
|
|
key = key.replace('ADIDAS', 'ADIDAS SB') |
|
|
if key not in self.similarity_matrix.index: |
|
|
print(self.similarity_matrix.index) |
|
|
raise ValueError(f'{key} not in matrix') |
|
|
return self.similarity_matrix[key] |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
rec = SimilarityRecommender("./TopBrands.xlsx") |
|
|
print(rec.make_recommendation("louis vuitton-1")) |
|
|
print(rec.similarity_matrix) |
|
|
print(rec.make_recommendation("Lacoste")) |