import gradio as gr import pandas as pd import numpy as np import folium import sys import os # Add utils to path sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'utils')) from clean_text import clean_text from semantic_similarity import Encoder from ranker import compute_bayesian_popularity_score from main import get_recommendations print("Loading restaurant data...") data = pd.read_csv("../data/toy_data_aggregated_embeddings.csv") print(f"Loaded {len(data)} restaurants") # Compute Bayesian popularity scores print("Computing popularity scores...") data = compute_bayesian_popularity_score(data) print("Popularity scores computed") print("Loading pre-computed embeddings...") all_desc_embeddings = np.vstack(data["embedding"].values) print(f"Loaded embeddings with shape {all_desc_embeddings.shape}") # Initialize semantic encoder print("Loading semantic encoder model...") try: encoder = Encoder() print("Semantic encoder loaded") except Exception as e: print(f"Warning: Could not load semantic encoder: {e}") print("Falling back to keyword-only search") def create_paris_map(results_df): """Create interactive map of Paris restaurants""" paris_center = [48.8566, 2.3522] m = folium.Map(location=paris_center, zoom_start=12, tiles='OpenStreetMap') for idx, row in results_df.iterrows(): lat_offset = np.random.uniform(-0.05, 0.05) lng_offset = np.random.uniform(-0.07, 0.07) coords = [48.8566 + lat_offset, 2.3522 + lng_offset] rating = float(row.get('overall_rating', 0)) color = 'green' if rating >= 4.5 else 'blue' if rating >= 4.0 else 'orange' if rating >= 3.5 else 'red' popup_html = f"""

{row['name']}

Rating: {row.get('overall_rating', 'N/A')}

Reviews: {row.get('review_count', 'N/A')}

Popularity Score: {row.get('pop_score', 'N/A'):.2f}

""" folium.Marker( location=coords, popup=folium.Popup(popup_html, max_width=300), icon=folium.Icon(color=color, icon='cutlery', prefix='fa') ).add_to(m) return m._repr_html_() # def semantic_search(query, data_source, num_results, use_popularity): # """Semantic search using embeddings""" # if not query.strip(): # return "Please enter a search query", None # try: # query_clean = clean_text(query) # # Generate query embedding # print(f"Encoding query: {query_clean}") # query_embedding = encoder.encode([query_clean], show_progress_bar=False) # query_embedding = query_embedding.cpu().numpy() # # Compute semantic similarity # similarities = cosine_similarity(query_embedding, all_desc_embeddings)[0] # # Combine with popularity if requested # if use_popularity: # sim_normalized = (similarities - similarities.min()) / (similarities.max() - similarities.min() + 1e-10) # pop_normalized = (data["pop_score"] - data["pop_score"].min()) / (data["pop_score"].max() - data["pop_score"].min() + 1e-10) # # Combined score: 70% semantic, 30% popularity # scores = 0.7 * sim_normalized + 0.3 * pop_normalized # else: # scores = similarities # top_indices = np.argsort(scores)[-int(num_results):][::-1] # results = data.iloc[top_indices].copy() # results['similarity_score'] = scores[top_indices] # map_html = create_paris_map(results) # output = f"Found {len(results)} restaurants for '{query}'\n" # output += f"Data Source: {data_source}\n" # output += f"Search Method: Semantic Search {'+ Popularity' if use_popularity else ''}\n\n" # for idx, (_, row) in enumerate(results.iterrows(), 1): # name = row.get('name', 'Unknown') # rating = row.get('overall_rating', 'N/A') # reviews = row.get('review_count', 'N/A') # similarity = row.get('similarity_score', 0) # pop_score = row.get('pop_score', 0) # output += f"{idx}. **{name}**\n" # output += f" Rating: {rating} | Reviews: {reviews}\n" # output += f" Match: {similarity:.3f}" # if use_popularity: # output += f" | Popularity: {pop_score:.2f}" # output += "\n" # if 'address' in row and pd.notna(row['address']): # addr = str(row['address'])[:100] # output += f" Address: {addr}\n" # output += "\n" # return output, map_html # except Exception as e: # import traceback # return f"Error: {str(e)}\n\n{traceback.format_exc()}", None # def keyword_search(query, data_source, num_results, use_popularity): # """Keyword-based search with optional popularity ranking""" # if not query.strip(): # return "Please enter a search query", None # try: # query_clean = clean_text(query).lower() # query_words = set(query_clean.split()) # scores = [] # for idx, row in data.iterrows(): # score = 0 # name = str(row.get('name', '')).lower() # # Check name matches # for word in query_words: # if word in name: # score += 2 # rating = float(row.get('overall_rating', 0)) # score += rating * 0.5 # # Add popularity if requested # if use_popularity: # pop_score = float(row.get('pop_score', 0)) # score += pop_score * 0.3 # scores.append(score) # top_indices = np.argsort(scores)[-int(num_results):][::-1] # results = data.iloc[top_indices].copy() # results['match_score'] = [scores[i] for i in top_indices] # map_html = create_paris_map(results) # output = f"Found {len(results)} restaurants for '{query}'\n" # output += f"Data Source: {data_source}\n" # output += f"Search Method: Keyword Search {'+ Popularity' if use_popularity else ''}\n\n" # for idx, (_, row) in enumerate(results.iterrows(), 1): # name = row.get('name', 'Unknown') # rating = row.get('overall_rating', 'N/A') # reviews = row.get('review_count', 'N/A') # match = row.get('match_score', 0) # pop_score = row.get('pop_score', 0) # output += f"{idx}. **{name}**\n" # output += f" Rating: {rating} | Reviews: {reviews}\n" # output += f" Match Score: {match:.2f}" # if use_popularity: # output += f" | Popularity: {pop_score:.2f}" # output += "\n" # if 'address' in row and pd.notna(row['address']): # addr = str(row['address'])[:100] # output += f" Address: {addr}\n" # output += "\n" # return output, map_html # except Exception as e: # import traceback # return f"Error: {str(e)}\n\n{traceback.format_exc()}", None # def search_restaurants(query, data_source, search_method, num_results, use_popularity): # """Main search function that routes to appropriate search method""" # if search_method == "Semantic Search" and use_semantic: # return semantic_search(query, data_source, num_results, use_popularity) # else: # return keyword_search(query, data_source, num_results, use_popularity) def search_restaurants(query_input, data_source, num_results): n_candidates = 100 query_clean = clean_text(query_input) return get_recommendations(query_clean, n_candidates, num_results) # Create Gradio interface with gr.Blocks(title="Restaurant Finder", theme=gr.themes.Soft()) as app: gr.Markdown(""" # Advanced Restaurant Recommendation System ### Search Through 5,000+ Restaurants with AI-Powered Semantic Search Find restaurants using semantic understanding and popularity ranking! """) with gr.Row(): with gr.Column(scale=3): query_input = gr.Textbox( label="Search Query", placeholder="e.g., Italian pasta, best sushi, romantic dinner, family-friendly pizza", lines=2 ) with gr.Column(scale=2): data_source = gr.Dropdown( choices=["Michelin", "Google", "Yelp"], value="Yelp", label="Data Source", info="Select restaurant data source" ) with gr.Row(): # with gr.Column(scale=2): # search_method = gr.Radio( # choices=["Keyword Search", "Semantic Search"], # value="Semantic Search" if use_semantic else "Keyword Search", # label="Search Method", # info="Semantic uses AI embeddings, Keyword uses exact matches" # ) with gr.Column(scale=1): num_results = gr.Slider( minimum=5, maximum=30, value=10, step=5, label="Results" ) # with gr.Column(scale=1): # use_popularity = gr.Checkbox( # label="Use Popularity Ranking", # value=True, # info="Boost popular restaurants" # ) search_btn = gr.Button("Search Restaurants", variant="primary", size="lg") with gr.Row(): with gr.Column(scale=1): results_output = gr.Textbox( label="Restaurant Results", lines=20, max_lines=30 ) with gr.Column(scale=1): map_output = gr.HTML( label="Paris Map" ) gr.Markdown("### Try These Examples:") examples = [ ["Italian pasta", "Yelp", 10], ["sushi", "Michelin", 10], ["romantic dinner", "Google", 8], ["family-friendly pizza", "Yelp", 10], ["best seafood", "Michelin", 10], ["cheap burger", "Google", 10] ] gr.Examples( examples=examples, inputs=[query_input, data_source, num_results] ) search_btn.click( fn=search_restaurants, inputs=[query_input, data_source, num_results], outputs=[results_output, map_output] ) query_input.submit( fn=search_restaurants, inputs=[query_input, data_source, num_results], outputs=[results_output, map_output] ) if __name__ == "__main__": print("\nStarting Advanced Restaurant Finder...") print(f"{len(data)} restaurants ready to search") print(f"Popularity Ranking: Enabled") print("Opening at http://127.0.0.1:7860\n") app.launch(share=False, server_name="127.0.0.1", server_port=7860, inbrowser=True)