knguyen471 commited on
Commit
78f14ab
·
verified ·
1 Parent(s): 83dc914

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -153
app.py CHANGED
@@ -9,23 +9,12 @@ import os
9
  sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'utils'))
10
  from clean_text import clean_text
11
  from semantic_similarity import Encoder
12
- from ranker import compute_bayesian_popularity_score
13
  from main import get_recommendations
14
 
15
  print("Loading restaurant data...")
16
  data = pd.read_csv("data/toy_data_aggregated_embeddings.csv")
17
  print(f"Loaded {len(data)} restaurants")
18
 
19
- # # Compute Bayesian popularity scores
20
- # print("Computing popularity scores...")
21
- # data = compute_bayesian_popularity_score(data)
22
- # print("Popularity scores computed")
23
-
24
- # Load embeddings
25
- print("Loading pre-computed embeddings...")
26
- all_desc_embeddings = np.vstack(data["embedding"].values)
27
- print(f"Loaded embeddings with shape {all_desc_embeddings.shape}")
28
-
29
  # Initialize semantic encoder
30
  print("Loading semantic encoder model...")
31
  try:
@@ -65,135 +54,9 @@ def create_paris_map(results_df):
65
 
66
  return m._repr_html_()
67
 
68
- # def semantic_search(query, data_source, num_results, use_popularity):
69
- # """Semantic search using embeddings"""
70
- # if not query.strip():
71
- # return "Please enter a search query", None
72
-
73
- # try:
74
- # query_clean = clean_text(query)
75
-
76
- # # Generate query embedding
77
- # print(f"Encoding query: {query_clean}")
78
- # query_embedding = encoder.encode([query_clean], show_progress_bar=False)
79
- # query_embedding = query_embedding.cpu().numpy()
80
-
81
- # # Compute semantic similarity
82
- # similarities = cosine_similarity(query_embedding, all_desc_embeddings)[0]
83
-
84
- # # Combine with popularity if requested
85
- # if use_popularity:
86
- # sim_normalized = (similarities - similarities.min()) / (similarities.max() - similarities.min() + 1e-10)
87
- # pop_normalized = (data["pop_score"] - data["pop_score"].min()) / (data["pop_score"].max() - data["pop_score"].min() + 1e-10)
88
- # # Combined score: 70% semantic, 30% popularity
89
- # scores = 0.7 * sim_normalized + 0.3 * pop_normalized
90
- # else:
91
- # scores = similarities
92
-
93
- # top_indices = np.argsort(scores)[-int(num_results):][::-1]
94
- # results = data.iloc[top_indices].copy()
95
- # results['similarity_score'] = scores[top_indices]
96
-
97
- # map_html = create_paris_map(results)
98
-
99
- # output = f"Found {len(results)} restaurants for '{query}'\n"
100
- # output += f"Data Source: {data_source}\n"
101
- # output += f"Search Method: Semantic Search {'+ Popularity' if use_popularity else ''}\n\n"
102
-
103
- # for idx, (_, row) in enumerate(results.iterrows(), 1):
104
- # name = row.get('name', 'Unknown')
105
- # rating = row.get('overall_rating', 'N/A')
106
- # reviews = row.get('review_count', 'N/A')
107
- # similarity = row.get('similarity_score', 0)
108
- # pop_score = row.get('pop_score', 0)
109
-
110
- # output += f"{idx}. **{name}**\n"
111
- # output += f" Rating: {rating} | Reviews: {reviews}\n"
112
- # output += f" Match: {similarity:.3f}"
113
- # if use_popularity:
114
- # output += f" | Popularity: {pop_score:.2f}"
115
- # output += "\n"
116
-
117
- # if 'address' in row and pd.notna(row['address']):
118
- # addr = str(row['address'])[:100]
119
- # output += f" Address: {addr}\n"
120
-
121
- # output += "\n"
122
-
123
- # return output, map_html
124
-
125
- # except Exception as e:
126
- # import traceback
127
- # return f"Error: {str(e)}\n\n{traceback.format_exc()}", None
128
-
129
- # def keyword_search(query, data_source, num_results, use_popularity):
130
- # """Keyword-based search with optional popularity ranking"""
131
- # if not query.strip():
132
- # return "Please enter a search query", None
133
-
134
- # try:
135
- # query_clean = clean_text(query).lower()
136
- # query_words = set(query_clean.split())
137
-
138
- # scores = []
139
- # for idx, row in data.iterrows():
140
- # score = 0
141
- # name = str(row.get('name', '')).lower()
142
-
143
- # # Check name matches
144
- # for word in query_words:
145
- # if word in name:
146
- # score += 2
147
-
148
- # rating = float(row.get('overall_rating', 0))
149
- # score += rating * 0.5
150
-
151
- # # Add popularity if requested
152
- # if use_popularity:
153
- # pop_score = float(row.get('pop_score', 0))
154
- # score += pop_score * 0.3
155
-
156
- # scores.append(score)
157
-
158
- # top_indices = np.argsort(scores)[-int(num_results):][::-1]
159
- # results = data.iloc[top_indices].copy()
160
- # results['match_score'] = [scores[i] for i in top_indices]
161
-
162
- # map_html = create_paris_map(results)
163
-
164
- # output = f"Found {len(results)} restaurants for '{query}'\n"
165
- # output += f"Data Source: {data_source}\n"
166
- # output += f"Search Method: Keyword Search {'+ Popularity' if use_popularity else ''}\n\n"
167
-
168
- # for idx, (_, row) in enumerate(results.iterrows(), 1):
169
- # name = row.get('name', 'Unknown')
170
- # rating = row.get('overall_rating', 'N/A')
171
- # reviews = row.get('review_count', 'N/A')
172
- # match = row.get('match_score', 0)
173
- # pop_score = row.get('pop_score', 0)
174
-
175
- # output += f"{idx}. **{name}**\n"
176
- # output += f" Rating: {rating} | Reviews: {reviews}\n"
177
- # output += f" Match Score: {match:.2f}"
178
- # if use_popularity:
179
- # output += f" | Popularity: {pop_score:.2f}"
180
- # output += "\n"
181
-
182
- # if 'address' in row and pd.notna(row['address']):
183
- # addr = str(row['address'])[:100]
184
- # output += f" Address: {addr}\n"
185
-
186
- # output += "\n"
187
-
188
- # return output, map_html
189
-
190
- # except Exception as e:
191
- # import traceback
192
- # return f"Error: {str(e)}\n\n{traceback.format_exc()}", None
193
-
194
 
195
  def search_restaurants(query_input, data_source, num_results):
196
- n_candidates = 100
197
  query_clean = clean_text(query_input)
198
  restaurant_ids = get_recommendations(query_clean, n_candidates, num_results, data_source)
199
 
@@ -250,13 +113,6 @@ with gr.Blocks(
250
  )
251
 
252
  with gr.Row():
253
- # with gr.Column(scale=2):
254
- # search_method = gr.Radio(
255
- # choices=["Keyword Search", "Semantic Search"],
256
- # value="Semantic Search" if use_semantic else "Keyword Search",
257
- # label="Search Method",
258
- # info="Semantic uses AI embeddings, Keyword uses exact matches"
259
- # )
260
 
261
  with gr.Column(scale=1):
262
  num_results = gr.Slider(
@@ -266,13 +122,6 @@ with gr.Blocks(
266
  step=5,
267
  label="Results"
268
  )
269
-
270
- # with gr.Column(scale=1):
271
- # use_popularity = gr.Checkbox(
272
- # label="Use Popularity Ranking",
273
- # value=True,
274
- # info="Boost popular restaurants"
275
- # )
276
 
277
  search_btn = gr.Button("Search Restaurants", variant="primary", size="lg")
278
 
@@ -320,7 +169,6 @@ with gr.Blocks(
320
  if __name__ == "__main__":
321
  print("\nStarting Advanced Restaurant Finder...")
322
  print(f"{len(data)} restaurants ready to search")
323
- print(f"Popularity Ranking: Enabled")
324
  print("Opening at http://127.0.0.1:7860\n")
325
 
326
  # if run locally
 
9
  sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'utils'))
10
  from clean_text import clean_text
11
  from semantic_similarity import Encoder
 
12
  from main import get_recommendations
13
 
14
  print("Loading restaurant data...")
15
  data = pd.read_csv("data/toy_data_aggregated_embeddings.csv")
16
  print(f"Loaded {len(data)} restaurants")
17
 
 
 
 
 
 
 
 
 
 
 
18
  # Initialize semantic encoder
19
  print("Loading semantic encoder model...")
20
  try:
 
54
 
55
  return m._repr_html_()
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def search_restaurants(query_input, data_source, num_results):
59
+ n_candidates = 2000
60
  query_clean = clean_text(query_input)
61
  restaurant_ids = get_recommendations(query_clean, n_candidates, num_results, data_source)
62
 
 
113
  )
114
 
115
  with gr.Row():
 
 
 
 
 
 
 
116
 
117
  with gr.Column(scale=1):
118
  num_results = gr.Slider(
 
122
  step=5,
123
  label="Results"
124
  )
 
 
 
 
 
 
 
125
 
126
  search_btn = gr.Button("Search Restaurants", variant="primary", size="lg")
127
 
 
169
  if __name__ == "__main__":
170
  print("\nStarting Advanced Restaurant Finder...")
171
  print(f"{len(data)} restaurants ready to search")
 
172
  print("Opening at http://127.0.0.1:7860\n")
173
 
174
  # if run locally