MatanYehudaDataAnalyst commited on
Commit
ae0fc19
·
verified ·
1 Parent(s): 4f6f03e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -9,27 +9,22 @@ from sklearn.metrics.pairwise import cosine_similarity
9
  # ==========================================
10
  # 1. SETUP & DATA LOADING
11
  # ==========================================
12
- # We use the EXACT filenames you provided
13
  csv_path = "cleaned_dataset_10k.csv"
14
  pkl_path = "final_embeddings_10k.pkl"
15
 
16
- # Check if files exist to prevent crashing
17
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
18
  raise FileNotFoundError(f"Error: Files not found. I see: {os.listdir('.')}")
19
 
20
  # Load Data
21
  df = pd.read_csv(csv_path)
22
-
23
- # Normalize column names (fixes 'Restaurant Name' vs 'restaurant_name' issues)
24
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
25
 
26
- # Helper to find the right column names
27
  def get_col(candidates, default):
28
  for c in candidates:
29
  if c in df.columns: return c
30
  return default
31
 
32
- # Map your CSV columns to what the app needs
33
  col_name = get_col(['restaurant_name', 'name', 'place'], 'restaurant_name')
34
  col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
35
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
@@ -38,7 +33,6 @@ col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona
38
  # Load Embeddings
39
  with open(pkl_path, 'rb') as f:
40
  embedding_data = pickle.load(f)
41
- # Handle if pickle is a dictionary or direct array
42
  if isinstance(embedding_data, dict) and 'embeddings' in embedding_data:
43
  dataset_embeddings = embedding_data['embeddings']
44
  else:
@@ -47,7 +41,7 @@ with open(pkl_path, 'rb') as f:
47
  # Load Model
48
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
49
 
50
- # Calculate Persona Profiles
51
  persona_profiles = {}
52
  if col_persona in df.columns:
53
  for persona in df[col_persona].unique():
@@ -64,27 +58,20 @@ else:
64
  # 2. LOGIC ENGINE
65
  # ==========================================
66
  def run_ven_engine(budget, dietary, company, purpose, noise):
67
- # 1. Create a search query
68
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
69
-
70
- # 2. Encode query
71
  query_vec = model.encode([user_context])
72
 
73
- # 3. Find closest Persona
74
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
75
  closest_persona = max(similarities, key=similarities.get)
76
 
77
- # 4. Filter data
78
  if col_persona in df.columns:
79
  persona_df = df[df[col_persona] == closest_persona]
80
  if persona_df.empty: persona_df = df
81
  else:
82
  persona_df = df
83
 
84
- # 5. Get top result
85
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
86
 
87
- # 6. Format Output
88
  match_pct = int(similarities[closest_persona] * 100)
89
  review_text = str(top_match[col_review])[:160] + "..."
90
 
@@ -119,14 +106,16 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
119
  in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
120
  in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
121
  in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
122
- in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite"], label="Occasion", value="Casual dinner")
 
 
 
123
  in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
124
  btn = gr.Button("Find My Table", variant="primary")
125
 
126
  with gr.Column():
127
  output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
128
 
129
- # --- THIS IS STEP 7: ONE-CLICK STARTERS ---
130
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
131
  gr.Examples(
132
  examples=[
 
9
  # ==========================================
10
  # 1. SETUP & DATA LOADING
11
  # ==========================================
 
12
  csv_path = "cleaned_dataset_10k.csv"
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
  raise FileNotFoundError(f"Error: Files not found. I see: {os.listdir('.')}")
17
 
18
  # Load Data
19
  df = pd.read_csv(csv_path)
 
 
20
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
21
 
22
+ # Helper to find column names
23
  def get_col(candidates, default):
24
  for c in candidates:
25
  if c in df.columns: return c
26
  return default
27
 
 
28
  col_name = get_col(['restaurant_name', 'name', 'place'], 'restaurant_name')
29
  col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
30
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
 
33
  # Load Embeddings
34
  with open(pkl_path, 'rb') as f:
35
  embedding_data = pickle.load(f)
 
36
  if isinstance(embedding_data, dict) and 'embeddings' in embedding_data:
37
  dataset_embeddings = embedding_data['embeddings']
38
  else:
 
41
  # Load Model
42
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
43
 
44
+ # Calculate Personas
45
  persona_profiles = {}
46
  if col_persona in df.columns:
47
  for persona in df[col_persona].unique():
 
58
  # 2. LOGIC ENGINE
59
  # ==========================================
60
  def run_ven_engine(budget, dietary, company, purpose, noise):
 
61
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
 
 
62
  query_vec = model.encode([user_context])
63
 
 
64
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
65
  closest_persona = max(similarities, key=similarities.get)
66
 
 
67
  if col_persona in df.columns:
68
  persona_df = df[df[col_persona] == closest_persona]
69
  if persona_df.empty: persona_df = df
70
  else:
71
  persona_df = df
72
 
 
73
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
74
 
 
75
  match_pct = int(similarities[closest_persona] * 100)
76
  review_text = str(top_match[col_review])[:160] + "..."
77
 
 
106
  in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
107
  in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
108
  in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
109
+
110
+ # --- FIX IS HERE: Added "Professional meeting" to the list ---
111
+ in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="Occasion", value="Casual dinner")
112
+
113
  in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
114
  btn = gr.Button("Find My Table", variant="primary")
115
 
116
  with gr.Column():
117
  output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
118
 
 
119
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
120
  gr.Examples(
121
  examples=[