Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- .gitattributes +1 -0
- app.py +167 -0
- restaurants_dataframe_with_embeddings.csv +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
restaurants_dataframe_with_embeddings.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import time
|
| 5 |
+
import string
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
from transformers import BertTokenizer, BertModel
|
| 9 |
+
from collections import defaultdict, Counter
|
| 10 |
+
from tqdm.auto import tqdm
|
| 11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
+
|
| 13 |
+
#Loading the model
|
| 14 |
+
@st.cache_resource
|
| 15 |
+
def get_models():
|
| 16 |
+
st.write('Loading the model...')
|
| 17 |
+
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
| 18 |
+
model = BertModel.from_pretrained("bert-base-uncased")
|
| 19 |
+
st.write("_The model is loaded and ready to use! :tada:_")
|
| 20 |
+
return model, tokenizer
|
| 21 |
+
|
| 22 |
+
#convert numpy arrays from strings back to arrays
|
| 23 |
+
def str_to_numpy(array_string):
|
| 24 |
+
array_string = array_string.replace('\n', '').replace('[','').replace(']','')
|
| 25 |
+
numpy_array = np.fromstring(array_string, sep=' ')
|
| 26 |
+
numpy_array = numpy_array.reshape((1, -1))
|
| 27 |
+
return numpy_array
|
| 28 |
+
|
| 29 |
+
@st.cache_data # 👈 Add the caching decorator
|
| 30 |
+
def load_data():
|
| 31 |
+
vectors_df = pd.read_csv('restaurants_dataframe_with_embeddings.csv')
|
| 32 |
+
embeds = dict(enumerate(vectors_df['Embeddings']))
|
| 33 |
+
rest_names = list(vectors_df['Names'])
|
| 34 |
+
return embeds, rest_names, vectors_df
|
| 35 |
+
|
| 36 |
+
#type: dict; keys: 0-n
|
| 37 |
+
restaurants_embeds, rest_names, df = load_data()
|
| 38 |
+
|
| 39 |
+
model, tokenizer = get_models()
|
| 40 |
+
|
| 41 |
+
#a function that takes a sentence and converts it into embeddings
|
| 42 |
+
def get_bert_embeddings(sentence, model, tokenizer):
|
| 43 |
+
inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
outputs = model(**inputs)
|
| 46 |
+
embeddings = outputs.last_hidden_state.mean(dim=1) # Average pool over tokens
|
| 47 |
+
return embeddings
|
| 48 |
+
|
| 49 |
+
# a function that return top-K best restaurants
|
| 50 |
+
def return_top_k(query, k=10):
|
| 51 |
+
embedded_query = get_bert_embeddings(query, model, tokenizer)
|
| 52 |
+
embedded_query = embedded_query.numpy()
|
| 53 |
+
|
| 54 |
+
top_similar = dict()
|
| 55 |
+
for i in range(len(restaurants_embeds)):
|
| 56 |
+
name = rest_names[i]
|
| 57 |
+
top_similar[i] = cosine_similarity(embedded_query, str_to_numpy(restaurants_embeds[i]))[0][0]
|
| 58 |
+
|
| 59 |
+
top_similar = dict(sorted(top_similar.items(), key=lambda item: item[1], reverse=True))
|
| 60 |
+
top_similar = dict([(key, value) for key, value in top_similar.items()][:k])
|
| 61 |
+
names = [rest_names[i] for i in top_similar.keys()]
|
| 62 |
+
result = dict(zip(names, top_similar.values()))
|
| 63 |
+
return result
|
| 64 |
+
|
| 65 |
+
#combines 2 users preferences into 1 string and fetches best options
|
| 66 |
+
def get_combined_preferences(user1, user2):
|
| 67 |
+
#TODO: optimize for more users
|
| 68 |
+
shared_pref = ''
|
| 69 |
+
for pref in user1:
|
| 70 |
+
shared_pref += pref
|
| 71 |
+
shared_pref += " "
|
| 72 |
+
shared_pref += " "
|
| 73 |
+
for pref in user2:
|
| 74 |
+
shared_pref += pref
|
| 75 |
+
shared_pref += " "
|
| 76 |
+
return shared_pref
|
| 77 |
+
|
| 78 |
+
if 'preferences_1' not in st.session_state:
|
| 79 |
+
st.session_state.preferences_1 = []
|
| 80 |
+
|
| 81 |
+
if 'preferences_2' not in st.session_state:
|
| 82 |
+
st.session_state.preferences_2 = []
|
| 83 |
+
|
| 84 |
+
if 'food' not in st.session_state:
|
| 85 |
+
st.session_state.food = ['Coffee', 'Italian', 'Mexican', 'Chinese', 'Indian', 'Asian', 'Fast food', 'Other']
|
| 86 |
+
|
| 87 |
+
if 'ambiance' not in st.session_state:
|
| 88 |
+
st.session_state.ambiance = ['Romantic date', 'Friends catching up', 'Family gathering', 'Big group', 'Business-meeting', 'Other']
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
if 'price' not in st.session_state:
|
| 92 |
+
st.session_state.price = dict(enumerate(['$', '$$', '$$$', '$$$$']))
|
| 93 |
+
|
| 94 |
+
# Configure Streamlit page and state
|
| 95 |
+
st.title("GoTogether!")
|
| 96 |
+
st.markdown(
|
| 97 |
+
"Tell us about your preferences!")
|
| 98 |
+
st.caption("In section 'Others', you can describe any wishes.")
|
| 99 |
+
|
| 100 |
+
st.write('User 1')
|
| 101 |
+
|
| 102 |
+
food_1 = st.selectbox('Select the food type you prefer', st.session_state.food, key=1)
|
| 103 |
+
if food_1 == 'Other':
|
| 104 |
+
food_1 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=10)
|
| 105 |
+
|
| 106 |
+
st.session_state.preferences_1.append(food_1)
|
| 107 |
+
|
| 108 |
+
ambiance_1 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=2)
|
| 109 |
+
if ambiance_1 == 'Other':
|
| 110 |
+
ambiance_1 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=11)
|
| 111 |
+
|
| 112 |
+
price_1 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=3)
|
| 113 |
+
|
| 114 |
+
st.session_state.preferences_1.append(ambiance_1)
|
| 115 |
+
|
| 116 |
+
st.write('User 2')
|
| 117 |
+
|
| 118 |
+
food_2 = st.selectbox('Select the food type you prefer', st.session_state.food, key=4)
|
| 119 |
+
if food_2 == 'Other':
|
| 120 |
+
food_2 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=13)
|
| 121 |
+
|
| 122 |
+
st.session_state.preferences_2.append(food_2)
|
| 123 |
+
|
| 124 |
+
ambiance_2 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=5)
|
| 125 |
+
if ambiance_2 == 'Other':
|
| 126 |
+
ambiance_2 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=12)
|
| 127 |
+
|
| 128 |
+
price_2 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=6)
|
| 129 |
+
|
| 130 |
+
st.session_state.preferences_2.append(ambiance_2)
|
| 131 |
+
|
| 132 |
+
submit = st.button("Submit")
|
| 133 |
+
if submit:
|
| 134 |
+
with st.spinner("Please wait while we are finding the best solution..."):
|
| 135 |
+
query = get_combined_preferences(st.session_state.preferences_1, st.session_state.preferences_2)
|
| 136 |
+
st.write("Your query is:", query)
|
| 137 |
+
results = return_top_k(query, k=10)
|
| 138 |
+
st.write("Here are the best matches to your preferences:")
|
| 139 |
+
i = 1
|
| 140 |
+
for name, score in results.items():
|
| 141 |
+
st.write("Top", i, ':', name, score)
|
| 142 |
+
condition = df['Names'] == name
|
| 143 |
+
# Use the condition to extract the value(s)
|
| 144 |
+
description = df.loc[condition, 'Strings']
|
| 145 |
+
st.write(description)
|
| 146 |
+
i+=1
|
| 147 |
+
|
| 148 |
+
st.session_state.preferences_1, st.session_state.preferences_2 = [], []
|
| 149 |
+
|
| 150 |
+
#TODO: include rating and price as variables
|
| 151 |
+
|
| 152 |
+
# if input:
|
| 153 |
+
# input_embed = model.encode(input)
|
| 154 |
+
# sim_score = similarity_top(input_embed, icd_embeddings)
|
| 155 |
+
# i = 1
|
| 156 |
+
# for dis, value in sim_score:
|
| 157 |
+
# st.write(f":green[Prediction number] {i}:")
|
| 158 |
+
# st.write(f"{dis} (similarity score:", value, ")")
|
| 159 |
+
# i+= 1
|
| 160 |
+
|
| 161 |
+
# text_spinner_placeholder = st.empty()
|
| 162 |
+
# with st.spinner("Please wait while your visualizations are being generated..."):
|
| 163 |
+
# time.sleep(5)
|
| 164 |
+
# vis_results_2d(input_embed)
|
| 165 |
+
# vis_results_3d(input_embed)
|
| 166 |
+
|
| 167 |
+
# #TODO: implement price range as a sliding bar
|
restaurants_dataframe_with_embeddings.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31926e0abc4ff33c12761b0cc1d2b2f855bb097463e72a2dccbe9f2f2df3cf70
|
| 3 |
+
size 19014982
|