Spaces:
Sleeping
Sleeping
Shraddha Gami commited on
Commit ·
138c021
1
Parent(s): 61b4182
Add personalised reco
Browse files- app.py +162 -0
- gofynd_old_model.model/fingerprint.pb +1 -0
- gofynd_old_model.model/saved_model.pb +0 -0
- gofynd_old_model.model/variables/variables.data-00000-of-00001 +0 -0
- gofynd_old_model.model/variables/variables.index +0 -0
- grouping_data.py +24 -0
- html_information.py +67 -0
- initial_sessions_fynd_pickle_filename.pkl +0 -0
- new_events_data.csv +0 -0
- requirements.txt +9 -0
- uid_name_map.json +0 -0
- uid_url_map.json +0 -0
- url_mapper.py +53 -0
- user_id_list.pkl +0 -0
- user_product_dict.pkl +0 -0
- users_with_multiple_sessions_filename.pkl +0 -0
app.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Text
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pickle
|
| 6 |
+
import numpy as np
|
| 7 |
+
import tensorflow as tf
|
| 8 |
+
import tensorflow_recommenders as tfrs
|
| 9 |
+
import streamlit as st
|
| 10 |
+
from html_information import html
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import json
|
| 15 |
+
|
| 16 |
+
def read_json(file_name):
|
| 17 |
+
with open(file_name) as json_file:
|
| 18 |
+
data = json.load(json_file)
|
| 19 |
+
return data
|
| 20 |
+
|
| 21 |
+
uid_name_map = read_json('uid_name_map.json')
|
| 22 |
+
uid_url_map = read_json('uid_url_map.json')
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
st.set_page_config(page_title="My App", page_icon=":guardsman:", layout="wide", initial_sidebar_state="auto")
|
| 26 |
+
|
| 27 |
+
@st.cache_resource
|
| 28 |
+
def load_model(path):
|
| 29 |
+
loaded = tf.saved_model.load(path)
|
| 30 |
+
return loaded
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def inference(model, user_id):
|
| 34 |
+
scores, titles = model([user_id])
|
| 35 |
+
recs = titles[0, :15]
|
| 36 |
+
extracted_rec = []
|
| 37 |
+
for rec in recs:
|
| 38 |
+
extracted_rec.append(int(rec.numpy().decode('utf-8')))
|
| 39 |
+
return extracted_rec
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def read_pickle_file(file_path):
|
| 43 |
+
with open(file_path, 'rb') as f:
|
| 44 |
+
data = pickle.load(f)
|
| 45 |
+
return data
|
| 46 |
+
|
| 47 |
+
def streamlit_carousel(header_name: str, rec_item_url: list,
|
| 48 |
+
rec_item_name: list) -> None:
|
| 49 |
+
st.header(header_name)
|
| 50 |
+
mid_section = ""
|
| 51 |
+
for index, value in enumerate(rec_item_url):
|
| 52 |
+
mid_section += """<div class="item"><div id="image-container"><img src='""" + str(value) + """' /></div><p>""" + str(rec_item_name[index]) + """</p></div>"""
|
| 53 |
+
|
| 54 |
+
mid_html = html+mid_section + """</div></div></body>"""
|
| 55 |
+
st.markdown(mid_html, unsafe_allow_html=True)
|
| 56 |
+
|
| 57 |
+
def recall_at_k(ground_truth, recommended, k):
|
| 58 |
+
"""
|
| 59 |
+
Calculate Recall@k.
|
| 60 |
+
|
| 61 |
+
Parameters:
|
| 62 |
+
- ground_truth (list): List of ground truth product IDs.
|
| 63 |
+
- recommended (list): List of recommended product IDs.
|
| 64 |
+
- k (int): Number of recommendations to consider.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
- recall (float): Recall@k value.
|
| 68 |
+
"""
|
| 69 |
+
# Take only the top-k recommended items
|
| 70 |
+
recommended_at_k = set(recommended[:k])
|
| 71 |
+
|
| 72 |
+
# Count the number of relevant items in the ground truth
|
| 73 |
+
relevant_items = set(ground_truth)
|
| 74 |
+
|
| 75 |
+
# Calculate the intersection (number of relevant items in top-k)
|
| 76 |
+
intersection = recommended_at_k.intersection(relevant_items)
|
| 77 |
+
|
| 78 |
+
# Calculate Recall@k
|
| 79 |
+
recall = len(intersection) / len(relevant_items) if len(relevant_items) > 0 else 0.0
|
| 80 |
+
|
| 81 |
+
return recall
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
model_weights_name = 'gofynd_old_model.model'
|
| 87 |
+
k = 15
|
| 88 |
+
print("######## Running ########")
|
| 89 |
+
print(f"model_weights_name: {model_weights_name}")
|
| 90 |
+
print('########')
|
| 91 |
+
print()
|
| 92 |
+
loaded = load_model(model_weights_name)
|
| 93 |
+
print("######### Model Loaded #########")
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# uid_name_map = read_pickle_file('new_uid_name_map.pkl')
|
| 97 |
+
# uid_url_map = read_pickle_file('new_uid_url_map.pkl')
|
| 98 |
+
# uid_url_map =
|
| 99 |
+
user_product_dict = read_pickle_file('user_product_dict.pkl')
|
| 100 |
+
last_session_user_product_dict = read_pickle_file('final_sessions_fynd_pickle_filename.pkl')
|
| 101 |
+
user_with_multiple_sessions = read_pickle_file('users_with_multiple_sessions_filename.pkl')
|
| 102 |
+
initial_sessions_user_product_dict = read_pickle_file('initial_sessions_fynd_pickle_filename.pkl')
|
| 103 |
+
# avg_recall = read_pickle_file('Personalised_two_tower_fynd_recall.pkl')
|
| 104 |
+
# positive_recall = read_pickle_file("Personalised_two_twoer_fynd_positive_recall.pkl")
|
| 105 |
+
# total_count = read_pickle_file("Personalised_two_twoer_fynd_total_count.pkl")
|
| 106 |
+
# average_positive_recall = read_pickle_file("Personalised_two_twoer_fynd_average_positive_recall.pkl")
|
| 107 |
+
user_id_list = user_with_multiple_sessions
|
| 108 |
+
|
| 109 |
+
# # st.set_page_config(page_title="My App", page_icon=":guardsman:", layout="wide", initial_sidebar_state="auto")
|
| 110 |
+
# st.header("Personalised Product Recommendations (Fynd)")
|
| 111 |
+
# st.subheader("Training Metrics")
|
| 112 |
+
# st.write(f"Average Recall@{k} on Test Set: {avg_recall}")
|
| 113 |
+
# st.write(f"Total Users Count: {total_count}")
|
| 114 |
+
# st.write(f"Users with Positive Recall@{k} on Test Set: {positive_recall}")
|
| 115 |
+
# st.write(f"% Users with Positive Recall@{k} on Test Set: {average_positive_recall}")
|
| 116 |
+
# col1, col2 = st.tabs(["Training & Test Loss", "Top 10 Test Accuracy"])
|
| 117 |
+
# with col1:
|
| 118 |
+
# st.image('Personalised_two_tower_fynd_loss_graph.png')
|
| 119 |
+
# with col2:
|
| 120 |
+
# st.image('Personalised_two_tower_fynd_top_10_accuracy_graph.png')
|
| 121 |
+
|
| 122 |
+
st.header("Personalised Product Recommendations")
|
| 123 |
+
st.write("Model trained with Clickstream data of GoFynd.com")
|
| 124 |
+
st.subheader("Choose a User")
|
| 125 |
+
index = st.selectbox("User List", range(len(user_id_list)), format_func=lambda x: user_id_list[x])
|
| 126 |
+
user_id = user_id_list[index]
|
| 127 |
+
print(f"User ID: {user_id}")
|
| 128 |
+
|
| 129 |
+
user_final_session = last_session_user_product_dict[user_id]
|
| 130 |
+
final_session_product_list = []
|
| 131 |
+
for all_session in user_final_session:
|
| 132 |
+
for session in all_session:
|
| 133 |
+
final_session_product_list.append(session['product_id'])
|
| 134 |
+
|
| 135 |
+
rec_list = inference(loaded, str(user_id))
|
| 136 |
+
print(f"Final Session Product List: {final_session_product_list}")
|
| 137 |
+
print(f"Recommendation List: {rec_list}")
|
| 138 |
+
recall_value = recall_at_k(final_session_product_list, rec_list, k)
|
| 139 |
+
print(f"Recall@{k}: {recall_value}")
|
| 140 |
+
st.write(f"Recommendation Score: {recall_value}")
|
| 141 |
+
|
| 142 |
+
initial_sessions = initial_sessions_user_product_dict[user_id]
|
| 143 |
+
|
| 144 |
+
tab1, tab2, tab3 = st.tabs(["Recommendations", "Test session data", "Train session data"])
|
| 145 |
+
with tab1:
|
| 146 |
+
rec_list_name = [uid_name_map[product_id] for product_id in rec_list]
|
| 147 |
+
rec_list_url = [uid_url_map[product_id] for product_id in rec_list]
|
| 148 |
+
streamlit_carousel("Top 15 Personalised Product Recommendation", rec_list_url, rec_list_name)
|
| 149 |
+
with tab2:
|
| 150 |
+
product_name_list = [uid_name_map[product_id] for product_id in final_session_product_list]
|
| 151 |
+
product_url_list = [uid_url_map[product_id] for product_id in final_session_product_list]
|
| 152 |
+
streamlit_carousel("User's Test Last Session Viewed Products", product_url_list, product_name_list)
|
| 153 |
+
with tab3:
|
| 154 |
+
i=1
|
| 155 |
+
for session in initial_sessions:
|
| 156 |
+
temp_product_list = []
|
| 157 |
+
for row in session:
|
| 158 |
+
temp_product_list.append(row['product_id'])
|
| 159 |
+
product_name_list = [uid_name_map[product_id] for product_id in temp_product_list]
|
| 160 |
+
product_url_list = [uid_url_map[product_id] for product_id in temp_product_list]
|
| 161 |
+
streamlit_carousel("Session "+str(i), product_url_list, product_name_list)
|
| 162 |
+
i+=1
|
gofynd_old_model.model/fingerprint.pb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
����ニ�,ꓼ�ߍ������� م������(ȟ�궷�i2
|
gofynd_old_model.model/saved_model.pb
ADDED
|
Binary file (57.5 kB). View file
|
|
|
gofynd_old_model.model/variables/variables.data-00000-of-00001
ADDED
|
Binary file (123 kB). View file
|
|
|
gofynd_old_model.model/variables/variables.index
ADDED
|
Binary file (340 Bytes). View file
|
|
|
grouping_data.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
df = pd.read_csv('new_events_data.csv')
|
| 4 |
+
grouped_data = df.groupby('user_id')['product_id'].agg(list).reset_index()
|
| 5 |
+
|
| 6 |
+
user_product_dict = dict(zip(grouped_data['user_id'], grouped_data['product_id']))
|
| 7 |
+
|
| 8 |
+
for key, value in user_product_dict.items():
|
| 9 |
+
print(key, value)
|
| 10 |
+
break
|
| 11 |
+
|
| 12 |
+
user_id_list = list(user_product_dict.keys())
|
| 13 |
+
print(len(user_id_list))
|
| 14 |
+
print(len(user_product_dict))
|
| 15 |
+
|
| 16 |
+
import pickle
|
| 17 |
+
|
| 18 |
+
def save_obj(obj, name):
|
| 19 |
+
with open(name, 'wb') as f:
|
| 20 |
+
pickle.dump(obj, f)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
save_obj(user_product_dict, 'user_product_dict.pkl')
|
| 24 |
+
save_obj(user_id_list, 'user_id_list.pkl')
|
html_information.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
html = """
|
| 2 |
+
<style>
|
| 3 |
+
body{
|
| 4 |
+
font-family: sans-serif;
|
| 5 |
+
}
|
| 6 |
+
h1{
|
| 7 |
+
font-weight: 100;
|
| 8 |
+
}
|
| 9 |
+
.container{
|
| 10 |
+
width: 100%;
|
| 11 |
+
display:block;
|
| 12 |
+
overflow:hidden;
|
| 13 |
+
}
|
| 14 |
+
.carousel{
|
| 15 |
+
display:block;
|
| 16 |
+
width: 100%;
|
| 17 |
+
height: 320px;
|
| 18 |
+
background: white;
|
| 19 |
+
overflow-x: scroll;
|
| 20 |
+
padding: 10px;
|
| 21 |
+
margin: 0;
|
| 22 |
+
white-space: nowrap;
|
| 23 |
+
border-top: 2px solid rgba(0, 0, 0, 0.1);
|
| 24 |
+
border-bottom: 2px solid rgba(0, 0, 0, 0.1);
|
| 25 |
+
}
|
| 26 |
+
.item {
|
| 27 |
+
display: inline-block;
|
| 28 |
+
overflow: hidden;
|
| 29 |
+
width: 250px;
|
| 30 |
+
margin: 0 10px;
|
| 31 |
+
height: calc(100%);
|
| 32 |
+
background: rgba(0, 0, 0, 0.05) no-repeat center center;
|
| 33 |
+
background-size: cover;
|
| 34 |
+
position:relative;
|
| 35 |
+
border-radius: 20px;
|
| 36 |
+
box-shadow: 0 0 10px #dfdfdf;
|
| 37 |
+
}
|
| 38 |
+
.item p {
|
| 39 |
+
padding: 20px;
|
| 40 |
+
word-break: break-all;
|
| 41 |
+
white-space: break-spaces;
|
| 42 |
+
overflow: hidden;
|
| 43 |
+
display: -webkit-box;
|
| 44 |
+
-webkit-line-clamp: 4;
|
| 45 |
+
-webkit-box-orient: vertical;
|
| 46 |
+
margin: 0;
|
| 47 |
+
align: center;
|
| 48 |
+
}
|
| 49 |
+
#image-container{
|
| 50 |
+
width: 100%;
|
| 51 |
+
height: 60%;
|
| 52 |
+
text-align:center;
|
| 53 |
+
font-size: 9em;
|
| 54 |
+
color: white;
|
| 55 |
+
overflow: hidden;
|
| 56 |
+
}
|
| 57 |
+
#image-container img{
|
| 58 |
+
width: 100%;
|
| 59 |
+
height: 100%;
|
| 60 |
+
object-fit: contain;
|
| 61 |
+
}
|
| 62 |
+
</style>
|
| 63 |
+
</head>
|
| 64 |
+
<body>
|
| 65 |
+
<div class="container">
|
| 66 |
+
<div class="carousel">
|
| 67 |
+
"""
|
initial_sessions_fynd_pickle_filename.pkl
ADDED
|
Binary file (71.8 kB). View file
|
|
|
new_events_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
keras==2.15.0
|
| 2 |
+
tensorboard==2.15.1
|
| 3 |
+
tensorboard-data-server==0.7.2
|
| 4 |
+
tensorflow==2.15.0
|
| 5 |
+
tensorflow-estimator==2.15.0
|
| 6 |
+
tensorflow-io-gcs-filesystem==0.34.0
|
| 7 |
+
tensorflow-recommenders==0.7.3
|
| 8 |
+
gcsfs
|
| 9 |
+
streamlit
|
uid_name_map.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
uid_url_map.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
url_mapper.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
def read_json(file_name):
|
| 5 |
+
with open(file_name) as json_file:
|
| 6 |
+
data = json.load(json_file)
|
| 7 |
+
return data
|
| 8 |
+
|
| 9 |
+
catalog_data = read_json('catalog.json')
|
| 10 |
+
|
| 11 |
+
inst = catalog_data[0]
|
| 12 |
+
print("Catalog data")
|
| 13 |
+
print(inst)
|
| 14 |
+
|
| 15 |
+
for key, value in inst.items():
|
| 16 |
+
print("Key: ", key)
|
| 17 |
+
print("Value: ", value)
|
| 18 |
+
print()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
for key, value in inst.items():
|
| 24 |
+
if key == "medias":
|
| 25 |
+
print(value[0]['url'])
|
| 26 |
+
if key == 'name':
|
| 27 |
+
print(value)
|
| 28 |
+
|
| 29 |
+
uid_name_map = {}
|
| 30 |
+
uid_url_map = {}
|
| 31 |
+
|
| 32 |
+
for inst in catalog_data:
|
| 33 |
+
for key, value in inst.items():
|
| 34 |
+
if key == "medias":
|
| 35 |
+
uid_url_map[inst['uid']] = value[0]['url']
|
| 36 |
+
if key == 'name':
|
| 37 |
+
uid_name_map[inst['uid']] = value
|
| 38 |
+
|
| 39 |
+
print(len(uid_name_map))
|
| 40 |
+
print(len(uid_url_map))
|
| 41 |
+
with open('uid_name_map.json', 'w') as json_file:
|
| 42 |
+
json.dump(uid_name_map, json_file, indent=4)
|
| 43 |
+
with open('uid_url_map.json', 'w') as json_file:
|
| 44 |
+
json.dump(uid_url_map, json_file, indent=4)
|
| 45 |
+
|
| 46 |
+
# import pickle
|
| 47 |
+
|
| 48 |
+
# def save_obj(obj, name):
|
| 49 |
+
# with open(name, 'wb') as f:
|
| 50 |
+
# pickle.dump(obj, f)
|
| 51 |
+
|
| 52 |
+
# save_obj(uid_name_map, 'new_uid_name_map.pkl')
|
| 53 |
+
# save_obj(uid_url_map, 'new_uid_url_map.pkl')
|
user_id_list.pkl
ADDED
|
Binary file (1.96 kB). View file
|
|
|
user_product_dict.pkl
ADDED
|
Binary file (7.79 kB). View file
|
|
|
users_with_multiple_sessions_filename.pkl
ADDED
|
Binary file (5.91 kB). View file
|
|
|