Spaces:

bacancydataprophets
/

Skin_Care_Product_Recommender

Build error

App Files Files Community

Skin_Care_Product_Recommender / app.py

dharak003

Upload 6 files

8a65e53 verified 11 months ago

raw

history blame contribute delete

4.39 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.manifold import TSNE
	from scipy.spatial.distance import cdist

	st.markdown('<style>h1{color: white;}</style>', unsafe_allow_html=True)
	st.title('Call on Doc Skin Care Product Recommender')
	st.write('Find the Right Skin Care for you')

	st.write("Hi there! If you have a skincare product you currently like I can help you find a similar one based on the ingredients.")

	st.write('Please select a product below so I can recommend similar ones')
	# Load the data
	df = pd.read_csv("./data/cosmetics.csv")

	# Choose a product category
	category = st.selectbox(label='Select a product category', options= df['Label'].unique() )
	category_subset = df[df['Label'] == category]
	# Choose a brand
	brand = st.selectbox(label='Select a brand', options= sorted(category_subset['Brand'].unique()))
	category_brand_subset = category_subset[category_subset['Brand'] == brand]
	# Choose product
	product = st.selectbox(label='Select the product', options= sorted(category_brand_subset['Name'].unique() ))

	#skin_type = st.selectbox(label='Select your skin type', options= ['Combination',
	# 'Dry', 'Normal', 'Oily', 'Sensitive'] )

	## Helper functions
	# Define the oh_encoder function
	def oh_encoder(tokens):
	x = np.zeros(N)
	for ingredient in tokens:
	# Get the index for each ingredient
	idx = ingredient_idx[ingredient]
	# Put 1 at the corresponding indices
	x[idx] = 1
	return x

	def closest_point(point, points):
	""" Find closest point from a list of points. """
	return points[cdist([point], points).argmin()]


	if category is not None:
	category_subset = df[df['Label'] == category]

	if product is not None:
	#skincare_type = category_subset[category_subset[str(skin_type)] == 1]

	# Reset index
	category_subset = category_subset.reset_index(drop=True)

	# Display data frame
	#st.dataframe(category_subset)

	# Initialize dictionary, list, and initial index
	ingredient_idx = {}
	corpus = []
	idx = 0

	# For loop for tokenization
	for i in range(len(category_subset)):
	ingredients = category_subset['Ingredients'][i]
	ingredients_lower = ingredients.lower()
	tokens = ingredients_lower.split(', ')
	corpus.append(tokens)
	for ingredient in tokens:
	if ingredient not in ingredient_idx:
	ingredient_idx[ingredient] = idx
	idx += 1


	# Get the number of items and tokens
	M = len(category_subset)
	N = len(ingredient_idx)

	# Initialize a matrix of zeros
	A = np.zeros((M,N))

	# Make a document-term matrix
	i = 0
	for tokens in corpus:
	A[i, :] = oh_encoder(tokens)
	i +=1

	model_run = st.button('Find similar products!')


	if model_run:

	st.write('Based on the ingredients of the product you selected')
	st.write('here are the top 10 products that are the most similar :sparkles:')

	# Run the model
	model = TSNE(n_components = 2, learning_rate = 150, random_state = 42)
	tsne_features = model.fit_transform(A)

	# Make X, Y columns
	category_subset['X'] = tsne_features[:, 0]
	category_subset['Y'] = tsne_features[:, 1]

	target = category_subset[category_subset['Name'] == product]

	target_x = target['X'].values[0]
	target_y = target['Y'].values[0]

	df1 = pd.DataFrame()
	df1['point'] = [(x, y) for x,y in zip(category_subset['X'], category_subset['Y'])]

	category_subset['distance'] = [cdist(np.array([[target_x,target_y]]), np.array([product]), metric='euclidean') for product in df1['point']]

	# arrange by descending order
	top_matches = category_subset.sort_values(by=['distance'])

	# Compute ingredients in common
	target_ingredients = target.Ingredients.values
	c1_list = target_ingredients[0].split(",")
	c1_list = [x.strip(' ') for x in c1_list]
	c1_set = set(c1_list)

	top_matches['Ingredients in common'] = [c1_set.intersection( set([x.strip(' ')for x in product.split(",")]) ) for product in top_matches['Ingredients']]

	# Select relevant columns
	top_matches = top_matches[['Label', 'Brand', 'Name', 'Price', 'Ingredients','Ingredients in common']]
	top_matches = top_matches.reset_index(drop=True)
	top_matches = top_matches.drop(top_matches.index[0])

	st.dataframe(top_matches.head(10))