MatanYehudaDataAnalyst commited on
Commit
3ba73d3
·
verified ·
1 Parent(s): eb9d187

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -136
app.py CHANGED
@@ -7,186 +7,147 @@ from sentence_transformers import SentenceTransformer
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
9
  # ==========================================
10
- # 1. INITIALIZATION & DATA LOADING
11
  # ==========================================
12
-
13
- # NOTE: We use relative paths because the files are in the same Hugging Face Space
14
  csv_path = "cleaned_dataset_10k.csv"
15
  pkl_path = "final_embeddings_10k.pkl"
16
 
17
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
18
- raise FileNotFoundError("❌ Missing files! Please upload 'cleaned_dataset_10k.csv' and 'final_embeddings_10k.pkl' to the Files tab.")
19
 
20
  # Load Data
21
  df = pd.read_csv(csv_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  with open(pkl_path, 'rb') as f:
23
  embedding_data = pickle.load(f)
24
  dataset_embeddings = embedding_data['embeddings']
25
 
26
- # Load the model
27
- # NOTE: Using the model Gal specified.
28
- # If you get a "dimension mismatch" error, change this back to 'sentence-transformers/all-MiniLM-L6-v2'
29
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
30
 
31
- # Pre-calculate Persona Taste Centers (Mean vectors)
32
- # This finds the "average" taste for each type of reviewer in your data
33
  persona_profiles = {}
34
- if 'reviewer_persona' in df.columns:
35
- for persona in df['reviewer_persona'].unique():
36
- indices = df[df['reviewer_persona'] == persona].index
37
- # We must ensure we only take embeddings that exist in the dataframe indices
38
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
39
  if valid_indices:
40
  persona_vectors = dataset_embeddings[valid_indices]
41
  persona_profiles[persona] = np.mean(persona_vectors, axis=0)
42
  else:
43
- # Fallback if 'reviewer_persona' column is missing, just use global average
44
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
45
 
46
  # ==========================================
47
- # 2. DESIGN SYSTEM (VEN BRANDING)
48
  # ==========================================
49
  ven_css = """
50
- body {
51
- background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important;
52
- font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
53
- }
54
- .ven-card {
55
- background: white; border: 1px solid rgba(15,23,42,0.08);
56
- border-radius: 24px; box-shadow: 0 20px 40px rgba(2,6,23,0.08);
57
- overflow: hidden; padding: 0; transition: transform 0.3s ease;
58
- }
59
- .ven-badge {
60
- width: 48px; height: 48px; border-radius: 16px; display: grid; place-items: center;
61
- background: linear-gradient(135deg, #006CE4, #3b82f6); color: white; font-weight: 900;
62
- }
63
- .ven-chip {
64
- padding: 6px 14px; border-radius: 100px; font-size: 12px; font-weight: 700;
65
- background: #f1f5f9; color: #475569; border: 1px solid #e2e8f0;
66
- }
67
- .ven-bar-bg { height: 8px; border-radius: 100px; background: #f1f5f9; margin-top: 8px; }
68
- .ven-bar-fill { height: 100%; border-radius: 100px; background: #006CE4; }
69
- .ven-btn {
70
- background: #006CE4; color: white !important; border: none;
71
- padding: 14px 28px; border-radius: 14px; font-weight: 800; cursor: pointer;
72
- width: 100%; transition: opacity 0.2s;
73
- }
74
- .ven-btn:hover { opacity: 0.9; }
75
  """
76
 
77
  # ==========================================
78
- # 3. COMPONENT GENERATORS
79
  # ==========================================
80
- def format_recommendation_ui(res_name, rating, persona, score, review):
81
- match_pct = int(score * 100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # Safety check for review text
84
- review_display = review[:160] + "..." if isinstance(review, str) else "Great place!"
85
-
86
  return f"""
87
  <div class="ven-card">
88
- <div style="padding: 24px;">
89
- <div style="display: flex; justify-content: space-between; align-items: flex-start;">
90
- <div style="display: flex; gap: 16px;">
91
- <div class="ven-badge">✨</div>
92
- <div>
93
- <h2 style="margin:0; font-size:22px; font-weight:900; color:#0f172a;">{res_name}</h2>
94
- <div style="margin-top:8px; display:flex; gap:8px;">
95
- <span class="ven-chip" style="background:#fff7ed; color:#c2410c; border-color:#fed7aa;">Top pick for {persona}</span>
96
- </div>
97
- </div>
98
- </div>
99
- <div style="text-align: right;">
100
- <div style="font-size:28px; font-weight:900; color:#006CE4;">{rating:.1f}</div>
101
- <div style="font-size:12px; font-weight:700; color:#94a3b8;">RATING</div>
102
- </div>
103
  </div>
104
-
105
- <div style="margin-top:24px;">
106
- <div style="display:flex; justify-content:space-between; font-weight:800; font-size:14px;">
107
- <span>VEN Match Confidence</span>
108
- <span style="color:#006CE4;">{match_pct}%</span>
109
- </div>
110
- <div class="ven-bar-bg"><div class="ven-bar-fill" style="width:{match_pct}%"></div></div>
111
- </div>
112
-
113
- <div style="margin-top:24px; padding:16px; background:#f8fafc; border-radius:16px;">
114
- <p style="margin:0; font-size:14px; line-height:1.6; color:#334155;">
115
- <b>Why it's a match:</b> Based on your context, this venue aligns with the preferences of our <b>{persona}</b> profile.
116
- Users said: "<i>{review_display}</i>"
117
- </p>
118
- </div>
119
-
120
- <div style="margin-top:24px;">
121
- <button class="ven-btn">Reserve with VEN Exclusive</button>
122
  </div>
123
  </div>
 
 
 
124
  </div>
125
  """
126
 
127
  # ==========================================
128
- # 4. LOGIC ENGINE
129
  # ==========================================
130
- def run_ven_engine(budget, dietary, company, purpose, noise):
131
- # Construct descriptive bio
132
- user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
133
-
134
- # Semantic Search
135
- query_vec = model.encode([user_context])
136
 
137
- # Find closest persona
138
- similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0]
139
- for p, v in persona_profiles.items()}
140
- closest_persona = max(similarities, key=similarities.get)
141
-
142
- # Filter data for that persona
143
- persona_df = df[df['reviewer_persona'] == closest_persona]
144
-
145
- # Safety: If no restaurants found for this persona, pick from the whole list
146
- if persona_df.empty:
147
- persona_df = df
148
-
149
- # Get highest rated in that group
150
- top_match = persona_df.sort_values(by='Rating', ascending=False).iloc[0]
151
-
152
- return format_recommendation_ui(
153
- top_match['Restaurant Name'],
154
- top_match['Rating'],
155
- closest_persona,
156
- similarities[closest_persona],
157
- top_match['Review']
158
- )
159
-
160
- # ==========================================
161
- # 5. UI LAYOUT
162
- # ==========================================
163
- with gr.Blocks(css=ven_css, title="VEN — AI Matchmaker") as demo:
164
- gr.HTML("<div style='text-align:center; padding: 40px 0;'><h1 style='font-size:36px; font-weight:950; color:#0f172a;'>VEN</h1><p style='color:#64748b; font-weight:600;'>Semantic Restaurant Discovery for Tel Aviv</p></div>")
165
-
166
  with gr.Row():
167
- with gr.Column(scale=1):
168
- with gr.Group():
169
- gr.Markdown("### 🔍 Filter your Vibe")
170
- in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
171
- in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
172
- in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
173
- in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="Occasion", value="Casual dinner")
174
- in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
175
-
176
- search_btn = gr.Button("Find My Table", variant="primary")
177
-
178
- with gr.Column(scale=1.5):
179
- gr.Markdown("### 🎯 Your Personal Match")
180
- output_ui = gr.HTML("<div style='text-align:center; padding:100px; color:#cbd5e1; font-weight:600; border:2px dashed #e2e8f0; border-radius:24px;'>Adjust the filters to generate your AI recommendation</div>")
181
-
182
- search_btn.click(
183
- fn=run_ven_engine,
 
 
 
184
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
185
- outputs=output_ui
 
 
186
  )
187
 
188
- # ==========================================
189
- # 6. LAUNCH
190
- # ==========================================
191
  if __name__ == "__main__":
192
  demo.launch()
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
9
  # ==========================================
10
+ # 1. SETUP & DATA LOADING
11
  # ==========================================
 
 
12
  csv_path = "cleaned_dataset_10k.csv"
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
+ raise FileNotFoundError("❌ Missing files! Please upload 'cleaned_dataset_10k.csv' and 'final_embeddings_10k.pkl'")
17
 
18
  # Load Data
19
  df = pd.read_csv(csv_path)
20
+
21
+ # --- SAFETY FIX: Normalize Column Names ---
22
+ # This ensures it works whether your CSV has "Restaurant Name" or "restaurant_name"
23
+ df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
24
+
25
+ # Check for required columns and fallback if missing
26
+ def get_col(candidates, default):
27
+ for c in candidates:
28
+ if c in df.columns: return c
29
+ return default
30
+
31
+ col_name = get_col(['restaurant_name', 'name', 'place'], 'restaurant_name')
32
+ col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
33
+ col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
34
+ col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
35
+
36
+ # Load Embeddings
37
  with open(pkl_path, 'rb') as f:
38
  embedding_data = pickle.load(f)
39
  dataset_embeddings = embedding_data['embeddings']
40
 
41
+ # Load Model
 
 
42
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
43
 
44
+ # Calculate Persona Profiles
 
45
  persona_profiles = {}
46
+ if col_persona in df.columns:
47
+ for persona in df[col_persona].unique():
48
+ if pd.isna(persona): continue
49
+ indices = df[df[col_persona] == persona].index
50
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
51
  if valid_indices:
52
  persona_vectors = dataset_embeddings[valid_indices]
53
  persona_profiles[persona] = np.mean(persona_vectors, axis=0)
54
  else:
55
+ # Fallback if no persona column exists
56
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
57
 
58
  # ==========================================
59
+ # 2. UI DESIGN (VEN BRANDING)
60
  # ==========================================
61
  ven_css = """
62
+ body { background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important; font-family: sans-serif !important; }
63
+ .ven-card { background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; box-shadow: 0 10px 30px -10px rgba(0,0,0,0.1); }
64
+ .ven-header { font-size: 24px; font-weight: 800; color: #1e293b; margin-bottom: 5px; }
65
+ .ven-sub { font-size: 14px; color: #64748b; font-weight: 600; margin-bottom: 20px; }
66
+ .ven-score { font-size: 32px; font-weight: 900; color: #2563eb; }
67
+ .ven-btn { background: #2563eb; color: white; border: none; font-weight: 700; border-radius: 12px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  """
69
 
70
  # ==========================================
71
+ # 3. LOGIC ENGINE
72
  # ==========================================
73
+ def run_ven_engine(budget, dietary, company, purpose, noise):
74
+ # 1. Create a search query from the dropdowns
75
+ user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
76
+
77
+ # 2. Encode the query
78
+ query_vec = model.encode([user_context])
79
+
80
+ # 3. Find the closest Persona (Cluster)
81
+ similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
82
+ closest_persona = max(similarities, key=similarities.get)
83
+
84
+ # 4. Filter data for that persona
85
+ if col_persona in df.columns:
86
+ persona_df = df[df[col_persona] == closest_persona]
87
+ if persona_df.empty: persona_df = df
88
+ else:
89
+ persona_df = df
90
+
91
+ # 5. Get the highest rated restaurant in that group
92
+ top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
93
+
94
+ # 6. Format the output HTML
95
+ match_pct = int(similarities[closest_persona] * 100)
96
+ review_text = str(top_match[col_review])[:180] + "..."
97
 
 
 
 
98
  return f"""
99
  <div class="ven-card">
100
+ <div style="display:flex; justify-content:space-between;">
101
+ <div>
102
+ <div class="ven-header">{top_match[col_name]}</div>
103
+ <div class="ven-sub">Top Match for {closest_persona}</div>
 
 
 
 
 
 
 
 
 
 
 
104
  </div>
105
+ <div style="text-align:right;">
106
+ <div class="ven-score">{top_match[col_rating]}</div>
107
+ <div style="font-size:12px; font-weight:bold; color:#94a3b8;">RATING</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  </div>
109
  </div>
110
+ <hr style="border:0; border-top:1px solid #f1f5f9; margin: 15px 0;">
111
+ <p style="color:#334155; line-height:1.6;"><i>"{review_text}"</i></p>
112
+ <div style="margin-top:15px; font-size:13px; font-weight:700; color:#2563eb;">Match Confidence: {match_pct}%</div>
113
  </div>
114
  """
115
 
116
  # ==========================================
117
+ # 4. APP LAYOUT
118
  # ==========================================
119
+ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
120
+ gr.Markdown("# 🍔 VEN: Restaurant Matchmaker")
121
+ gr.Markdown("Select your vibe below to get a personalized recommendation.")
 
 
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  with gr.Row():
124
+ with gr.Column():
125
+ in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
126
+ in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
127
+ in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
128
+ in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite"], label="Occasion", value="Casual dinner")
129
+ in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
130
+
131
+ btn = gr.Button("Find My Table", variant="primary")
132
+
133
+ with gr.Column():
134
+ output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
135
+
136
+ # --- STEP 7: REQUIRED QUICK STARTERS ---
137
+ gr.Markdown("### 🚀 One-Click Examples (Quick Starters)")
138
+ gr.Examples(
139
+ examples=[
140
+ ["Budget-friendly", "Vegetarian", "Friends", "Quick bite", "Moderate/Social"],
141
+ ["Premium", "Meat-lover", "Date/Couple", "Special occasion", "Quiet/Intimate"],
142
+ ["Mid-range", "Anything", "Business", "Professional meeting", "Quiet/Intimate"]
143
+ ],
144
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
145
+ outputs=output_ui,
146
+ fn=run_ven_engine,
147
+ cache_examples=True, # This pre-runs the examples so they are instant!
148
  )
149
 
150
+ btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
151
+
 
152
  if __name__ == "__main__":
153
  demo.launch()