Spaces:

MatanYehudaDataAnalyst
/

Finalproject_VEN

Sleeping

App Files Files Community

MatanYehudaDataAnalyst commited on Jan 15

Commit

0d94b00

verified ·

1 Parent(s): 1079e48

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -46

app.py CHANGED Viewed

@@ -3,35 +3,34 @@ import pandas as pd
 import numpy as np
 import pickle
 import os
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
 # ==========================================
 # 1. SETUP & DATA LOADING
 # ==========================================
 csv_path = "cleaned_dataset_10k.csv"
 pkl_path = "final_embeddings_10k.pkl"
 if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
-    raise FileNotFoundError("❌ Missing files! Please upload 'cleaned_dataset_10k.csv' and 'final_embeddings_10k.pkl'")
-# Load Data
 df = pd.read_csv(csv_path)
-# --- SAFETY FIX: Normalize Column Names ---
-# This ensures it works whether your CSV has "Restaurant Name" or "restaurant_name"
 df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
-# Check for required columns and fallback if missing
 def get_col(candidates, default):
     for c in candidates:
         if c in df.columns: return c
     return default
-col_name = get_col(['restaurant_name', 'name', 'place'], 'restaurant_name')
 col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
 col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
-col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
 # Load Embeddings
 with open(pkl_path, 'rb') as f:
@@ -41,69 +40,66 @@ with open(pkl_path, 'rb') as f:
 # Load Model
 model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
-# Calculate Persona Profiles
 persona_profiles = {}
 if col_persona in df.columns:
     for persona in df[col_persona].unique():
         if pd.isna(persona): continue
         indices = df[df[col_persona] == persona].index
         valid_indices = [i for i in indices if i < len(dataset_embeddings)]
         if valid_indices:
             persona_vectors = dataset_embeddings[valid_indices]
             persona_profiles[persona] = np.mean(persona_vectors, axis=0)
 else:
-    # Fallback if no persona column exists
     persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
 # ==========================================
-# 2. UI DESIGN (VEN BRANDING)
-# ==========================================
-ven_css = """
-body { background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important; font-family: sans-serif !important; }
-.ven-card { background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; box-shadow: 0 10px 30px -10px rgba(0,0,0,0.1); }
-.ven-header { font-size: 24px; font-weight: 800; color: #1e293b; margin-bottom: 5px; }
-.ven-sub { font-size: 14px; color: #64748b; font-weight: 600; margin-bottom: 20px; }
-.ven-score { font-size: 32px; font-weight: 900; color: #2563eb; }
-.ven-btn { background: #2563eb; color: white; border: none; font-weight: 700; border-radius: 12px; }
-"""
-# ==========================================
-# 3. LOGIC ENGINE
 # ==========================================
 def run_ven_engine(budget, dietary, company, purpose, noise):
-    # 1. Create a search query from the dropdowns
     user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
-    # 2. Encode the query
-    query_vec = model.encode([user_context])
-    # 3. Find the closest Persona (Cluster)
-    similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
-    closest_persona = max(similarities, key=similarities.get)
-    # 4. Filter data for that persona
     if col_persona in df.columns:
         persona_df = df[df[col_persona] == closest_persona]
         if persona_df.empty: persona_df = df
     else:
         persona_df = df
-    # 5. Get the highest rated restaurant in that group
     top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
-    # 6. Format the output HTML
-    match_pct = int(similarities[closest_persona] * 100)
     review_text = str(top_match[col_review])[:180] + "..."
     return f"""
-    <div class="ven-card">
         <div style="display:flex; justify-content:space-between;">
             <div>
-                <div class="ven-header">{top_match[col_name]}</div>
-                <div class="ven-sub">Top Match for {closest_persona}</div>
             </div>
             <div style="text-align:right;">
-                <div class="ven-score">{top_match[col_rating]}</div>
                 <div style="font-size:12px; font-weight:bold; color:#94a3b8;">RATING</div>
             </div>
         </div>
@@ -114,11 +110,12 @@ def run_ven_engine(budget, dietary, company, purpose, noise):
     """
 # ==========================================
-# 4. APP LAYOUT
 # ==========================================
 with gr.Blocks(css=ven_css, title="VEN Project") as demo:
     gr.Markdown("# 🍔 VEN: Restaurant Matchmaker")
-    gr.Markdown("Select your vibe below to get a personalized recommendation.")
     with gr.Row():
         with gr.Column():
@@ -127,14 +124,12 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
             in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
             in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite"], label="Occasion", value="Casual dinner")
             in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
             btn = gr.Button("Find My Table", variant="primary")
         with gr.Column():
             output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
-    # --- STEP 7: REQUIRED QUICK STARTERS ---
-    gr.Markdown("### 🚀 One-Click Examples (Quick Starters)")
     gr.Examples(
         examples=[
             ["Budget-friendly", "Vegetarian", "Friends", "Quick bite", "Moderate/Social"],
@@ -144,9 +139,8 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
         inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
         outputs=output_ui,
         fn=run_ven_engine,
-        cache_examples=True, # This pre-runs the examples so they are instant!
     )
     btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
 if __name__ == "__main__":

 import numpy as np
 import pickle
 import os
+from sentence_transformers import SentenceTransformer, util
+import torch
 # ==========================================
 # 1. SETUP & DATA LOADING
 # ==========================================
+# NOTE: Check your file names exactly!
 csv_path = "cleaned_dataset_10k.csv"
 pkl_path = "final_embeddings_10k.pkl"
 if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
+    # This error usually means the file names in the 'Files' tab are different
+    raise FileNotFoundError(f"❌ FILES NOT FOUND. I see these files: {os.listdir('.')}")
+# Load Data & Normalize Columns
 df = pd.read_csv(csv_path)
 df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
+# Helper to find columns even if names vary slightly
 def get_col(candidates, default):
     for c in candidates:
         if c in df.columns: return c
     return default
+col_name = get_col(['restaurant_name', 'name'], 'restaurant_name')
 col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
 col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
+col_persona = get_col(['reviewer_persona', 'persona'], 'reviewer_persona')
 # Load Embeddings
 with open(pkl_path, 'rb') as f:
 # Load Model
 model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
+# Calculate Persona Profiles (Average Vectors)
 persona_profiles = {}
 if col_persona in df.columns:
     for persona in df[col_persona].unique():
         if pd.isna(persona): continue
         indices = df[df[col_persona] == persona].index
+        # valid_indices ensures we don't crash if indices mismatch
         valid_indices = [i for i in indices if i < len(dataset_embeddings)]
         if valid_indices:
             persona_vectors = dataset_embeddings[valid_indices]
+            # Use torch/numpy to calculate mean
             persona_profiles[persona] = np.mean(persona_vectors, axis=0)
 else:
     persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
 # ==========================================
+# 2. LOGIC ENGINE (Replaced Scikit-Learn with Util)
 # ==========================================
 def run_ven_engine(budget, dietary, company, purpose, noise):
+    # 1. Create a search query
     user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
+    # 2. Encode query
+    query_vec = model.encode(user_context, convert_to_tensor=True)
+    # 3. Find closest Persona using Sentence-Transformers Utility (No Sklearn needed)
+    best_score = -1
+    closest_persona = list(persona_profiles.keys())[0]
+    for persona, profile_vec in persona_profiles.items():
+        # Convert profile to tensor for comparison
+        profile_tensor = torch.tensor(profile_vec)
+        score = util.cos_sim(query_vec, profile_tensor).item()
+        if score > best_score:
+            best_score = score
+            closest_persona = persona
+    # 4. Filter data
     if col_persona in df.columns:
         persona_df = df[df[col_persona] == closest_persona]
         if persona_df.empty: persona_df = df
     else:
         persona_df = df
+    # 5. Get top result
     top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
+    # 6. Format Output
     review_text = str(top_match[col_review])[:180] + "..."
+    match_pct = int(best_score * 100)
     return f"""
+    <div style="background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; box-shadow: 0 10px 30px -10px rgba(0,0,0,0.1);">
         <div style="display:flex; justify-content:space-between;">
             <div>
+                <div style="font-size: 24px; font-weight: 800; color: #1e293b;">{top_match[col_name]}</div>
+                <div style="font-size: 14px; color: #64748b; font-weight: 600;">Top Match for {closest_persona}</div>
             </div>
             <div style="text-align:right;">
+                <div style="font-size: 32px; font-weight: 900; color: #2563eb;">{top_match[col_rating]}</div>
                 <div style="font-size:12px; font-weight:bold; color:#94a3b8;">RATING</div>
             </div>
         </div>
     """
 # ==========================================
+# 3. APP UI
 # ==========================================
+ven_css = """body { background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important; font-family: sans-serif !important; }"""
 with gr.Blocks(css=ven_css, title="VEN Project") as demo:
     gr.Markdown("# 🍔 VEN: Restaurant Matchmaker")
     with gr.Row():
         with gr.Column():
             in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
             in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite"], label="Occasion", value="Casual dinner")
             in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
             btn = gr.Button("Find My Table", variant="primary")
         with gr.Column():
             output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
+    gr.Markdown("### 🚀 One-Click Examples")
     gr.Examples(
         examples=[
             ["Budget-friendly", "Vegetarian", "Friends", "Quick bite", "Moderate/Social"],
         inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
         outputs=output_ui,
         fn=run_ven_engine,
+        cache_examples=True,
     )
     btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
 if __name__ == "__main__":