Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame: | |
| wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"]) | |
| cosine_similarities = cosine_similarity( | |
| wine_row, df.drop(columns=["NAME", "cluster"]) | |
| ) | |
| top_5_indices = cosine_similarities[0].argsort()[-6:-1] | |
| res = df.iloc[top_5_indices][["NAME"]] | |
| # Convert to list | |
| res = res["NAME"].values.tolist() | |
| return res | |
| def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5): | |
| user_cluster = df.loc[user, 'cluster'] | |
| user_ratings = df.loc[user].drop('cluster') | |
| user_unrated = user_ratings[user_ratings == 0].index | |
| cluster_users = df[df['cluster'] == user_cluster] | |
| cluster_users.drop(['cluster', 'user'], axis=1, inplace=True) | |
| cluster_avg = cluster_users.mean() | |
| cluster_avg = cluster_avg[user_unrated] | |
| return cluster_avg.sort_values(ascending=False).keys()[:n].tolist() | |
| def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str): | |
| df = df2.copy() | |
| user_ratings = df.loc[new_user].drop('cluster') | |
| # calculate the similarity between the new user and all other users | |
| similarity = df.drop(['cluster', 'user'], axis=1).apply( | |
| lambda x: np.nanmean((x - user_ratings) ** 2), axis=1 | |
| ) | |
| # sort the users by similarity | |
| similarity = similarity.sort_values() | |
| # get the most similar user | |
| most_similar_user = similarity.keys()[1] | |
| return int(df.loc[most_similar_user, 'cluster']) | |