Spaces:

MatanYehudaDataAnalyst
/

Finalproject_VEN

Sleeping

App Files Files Community

MatanYehudaDataAnalyst commited on Jan 15

Commit

3ba73d3

verified ·

1 Parent(s): eb9d187

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -136

app.py CHANGED Viewed

@@ -7,186 +7,147 @@ from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 # ==========================================
-# 1. INITIALIZATION & DATA LOADING
 # ==========================================
-# NOTE: We use relative paths because the files are in the same Hugging Face Space
 csv_path = "cleaned_dataset_10k.csv"
 pkl_path = "final_embeddings_10k.pkl"
 if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
-    raise FileNotFoundError("❌ Missing files! Please upload 'cleaned_dataset_10k.csv' and 'final_embeddings_10k.pkl' to the Files tab.")
 # Load Data
 df = pd.read_csv(csv_path)
 with open(pkl_path, 'rb') as f:
     embedding_data = pickle.load(f)
     dataset_embeddings = embedding_data['embeddings']
-# Load the model
-# NOTE: Using the model Gal specified.
-# If you get a "dimension mismatch" error, change this back to 'sentence-transformers/all-MiniLM-L6-v2'
 model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
-# Pre-calculate Persona Taste Centers (Mean vectors)
-# This finds the "average" taste for each type of reviewer in your data
 persona_profiles = {}
-if 'reviewer_persona' in df.columns:
-    for persona in df['reviewer_persona'].unique():
-        indices = df[df['reviewer_persona'] == persona].index
-        # We must ensure we only take embeddings that exist in the dataframe indices
         valid_indices = [i for i in indices if i < len(dataset_embeddings)]
         if valid_indices:
             persona_vectors = dataset_embeddings[valid_indices]
             persona_profiles[persona] = np.mean(persona_vectors, axis=0)
 else:
-    # Fallback if 'reviewer_persona' column is missing, just use global average
     persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
 # ==========================================
-# 2. DESIGN SYSTEM (VEN BRANDING)
 # ==========================================
 ven_css = """
-body {
-    background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important;
-    font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
-}
-.ven-card {
-    background: white; border: 1px solid rgba(15,23,42,0.08);
-    border-radius: 24px; box-shadow: 0 20px 40px rgba(2,6,23,0.08);
-    overflow: hidden; padding: 0; transition: transform 0.3s ease;
-}
-.ven-badge {
-    width: 48px; height: 48px; border-radius: 16px; display: grid; place-items: center;
-    background: linear-gradient(135deg, #006CE4, #3b82f6); color: white; font-weight: 900;
-}
-.ven-chip {
-    padding: 6px 14px; border-radius: 100px; font-size: 12px; font-weight: 700;
-    background: #f1f5f9; color: #475569; border: 1px solid #e2e8f0;
-}
-.ven-bar-bg { height: 8px; border-radius: 100px; background: #f1f5f9; margin-top: 8px; }
-.ven-bar-fill { height: 100%; border-radius: 100px; background: #006CE4; }
-.ven-btn {
-    background: #006CE4; color: white !important; border: none;
-    padding: 14px 28px; border-radius: 14px; font-weight: 800; cursor: pointer;
-    width: 100%; transition: opacity 0.2s;
-}
-.ven-btn:hover { opacity: 0.9; }
 """
 # ==========================================
-# 3. COMPONENT GENERATORS
 # ==========================================
-def format_recommendation_ui(res_name, rating, persona, score, review):
-    match_pct = int(score * 100)
-    # Safety check for review text
-    review_display = review[:160] + "..." if isinstance(review, str) else "Great place!"
     return f"""
     <div class="ven-card">
-        <div style="padding: 24px;">
-            <div style="display: flex; justify-content: space-between; align-items: flex-start;">
-                <div style="display: flex; gap: 16px;">
-                    <div class="ven-badge">✨</div>
-                    <div>
-                        <h2 style="margin:0; font-size:22px; font-weight:900; color:#0f172a;">{res_name}</h2>
-                        <div style="margin-top:8px; display:flex; gap:8px;">
-                            <span class="ven-chip" style="background:#fff7ed; color:#c2410c; border-color:#fed7aa;">Top pick for {persona}</span>
-                        </div>
-                    </div>
-                </div>
-                <div style="text-align: right;">
-                    <div style="font-size:28px; font-weight:900; color:#006CE4;">{rating:.1f}</div>
-                    <div style="font-size:12px; font-weight:700; color:#94a3b8;">RATING</div>
-                </div>
             </div>
-            <div style="margin-top:24px;">
-                <div style="display:flex; justify-content:space-between; font-weight:800; font-size:14px;">
-                    <span>VEN Match Confidence</span>
-                    <span style="color:#006CE4;">{match_pct}%</span>
-                </div>
-                <div class="ven-bar-bg"><div class="ven-bar-fill" style="width:{match_pct}%"></div></div>
-            </div>
-            <div style="margin-top:24px; padding:16px; background:#f8fafc; border-radius:16px;">
-                <p style="margin:0; font-size:14px; line-height:1.6; color:#334155;">
-                    <b>Why it's a match:</b> Based on your context, this venue aligns with the preferences of our <b>{persona}</b> profile.
-                    Users said: "<i>{review_display}</i>"
-                </p>
-            </div>
-            <div style="margin-top:24px;">
-                <button class="ven-btn">Reserve with VEN Exclusive</button>
             </div>
         </div>
     </div>
     """
 # ==========================================
-# 4. LOGIC ENGINE
 # ==========================================
-def run_ven_engine(budget, dietary, company, purpose, noise):
-    # Construct descriptive bio
-    user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
-    # Semantic Search
-    query_vec = model.encode([user_context])
-    # Find closest persona
-    similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0]
-                    for p, v in persona_profiles.items()}
-    closest_persona = max(similarities, key=similarities.get)
-    # Filter data for that persona
-    persona_df = df[df['reviewer_persona'] == closest_persona]
-    # Safety: If no restaurants found for this persona, pick from the whole list
-    if persona_df.empty:
-        persona_df = df
-    # Get highest rated in that group
-    top_match = persona_df.sort_values(by='Rating', ascending=False).iloc[0]
-    return format_recommendation_ui(
-        top_match['Restaurant Name'],
-        top_match['Rating'],
-        closest_persona,
-        similarities[closest_persona],
-        top_match['Review']
-    )
-# ==========================================
-# 5. UI LAYOUT
-# ==========================================
-with gr.Blocks(css=ven_css, title="VEN — AI Matchmaker") as demo:
-    gr.HTML("<div style='text-align:center; padding: 40px 0;'><h1 style='font-size:36px; font-weight:950; color:#0f172a;'>VEN</h1><p style='color:#64748b; font-weight:600;'>Semantic Restaurant Discovery for Tel Aviv</p></div>")
     with gr.Row():
-        with gr.Column(scale=1):
-            with gr.Group():
-                gr.Markdown("### 🔍 Filter your Vibe")
-                in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
-                in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
-                in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
-                in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="Occasion", value="Casual dinner")
-                in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
-                search_btn = gr.Button("Find My Table", variant="primary")
-        with gr.Column(scale=1.5):
-            gr.Markdown("### 🎯 Your Personal Match")
-            output_ui = gr.HTML("<div style='text-align:center; padding:100px; color:#cbd5e1; font-weight:600; border:2px dashed #e2e8f0; border-radius:24px;'>Adjust the filters to generate your AI recommendation</div>")
-    search_btn.click(
-        fn=run_ven_engine,
         inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
-        outputs=output_ui
     )
-# ==========================================
-# 6. LAUNCH
-# ==========================================
 if __name__ == "__main__":
     demo.launch()

 from sklearn.metrics.pairwise import cosine_similarity
 # ==========================================
+# 1. SETUP & DATA LOADING
 # ==========================================
 csv_path = "cleaned_dataset_10k.csv"
 pkl_path = "final_embeddings_10k.pkl"
 if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
+    raise FileNotFoundError("❌ Missing files! Please upload 'cleaned_dataset_10k.csv' and 'final_embeddings_10k.pkl'")
 # Load Data
 df = pd.read_csv(csv_path)
+# --- SAFETY FIX: Normalize Column Names ---
+# This ensures it works whether your CSV has "Restaurant Name" or "restaurant_name"
+df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
+# Check for required columns and fallback if missing
+def get_col(candidates, default):
+    for c in candidates:
+        if c in df.columns: return c
+    return default
+col_name = get_col(['restaurant_name', 'name', 'place'], 'restaurant_name')
+col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
+col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
+col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
+# Load Embeddings
 with open(pkl_path, 'rb') as f:
     embedding_data = pickle.load(f)
     dataset_embeddings = embedding_data['embeddings']
+# Load Model
 model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
+# Calculate Persona Profiles
 persona_profiles = {}
+if col_persona in df.columns:
+    for persona in df[col_persona].unique():
+        if pd.isna(persona): continue
+        indices = df[df[col_persona] == persona].index
         valid_indices = [i for i in indices if i < len(dataset_embeddings)]
         if valid_indices:
             persona_vectors = dataset_embeddings[valid_indices]
             persona_profiles[persona] = np.mean(persona_vectors, axis=0)
 else:
+    # Fallback if no persona column exists
     persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
 # ==========================================
+# 2. UI DESIGN (VEN BRANDING)
 # ==========================================
 ven_css = """
+body { background: radial-gradient(1200px 600px at 20% 0%, #eef6ff 0%, #f8fafc 45%, #ffffff 100%) !important; font-family: sans-serif !important; }
+.ven-card { background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; box-shadow: 0 10px 30px -10px rgba(0,0,0,0.1); }
+.ven-header { font-size: 24px; font-weight: 800; color: #1e293b; margin-bottom: 5px; }
+.ven-sub { font-size: 14px; color: #64748b; font-weight: 600; margin-bottom: 20px; }
+.ven-score { font-size: 32px; font-weight: 900; color: #2563eb; }
+.ven-btn { background: #2563eb; color: white; border: none; font-weight: 700; border-radius: 12px; }
 """
 # ==========================================
+# 3. LOGIC ENGINE
 # ==========================================
+def run_ven_engine(budget, dietary, company, purpose, noise):
+    # 1. Create a search query from the dropdowns
+    user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
+    # 2. Encode the query
+    query_vec = model.encode([user_context])
+    # 3. Find the closest Persona (Cluster)
+    similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
+    closest_persona = max(similarities, key=similarities.get)
+    # 4. Filter data for that persona
+    if col_persona in df.columns:
+        persona_df = df[df[col_persona] == closest_persona]
+        if persona_df.empty: persona_df = df
+    else:
+        persona_df = df
+    # 5. Get the highest rated restaurant in that group
+    top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
+    # 6. Format the output HTML
+    match_pct = int(similarities[closest_persona] * 100)
+    review_text = str(top_match[col_review])[:180] + "..."
     return f"""
     <div class="ven-card">
+        <div style="display:flex; justify-content:space-between;">
+            <div>
+                <div class="ven-header">{top_match[col_name]}</div>
+                <div class="ven-sub">Top Match for {closest_persona}</div>
             </div>
+            <div style="text-align:right;">
+                <div class="ven-score">{top_match[col_rating]}</div>
+                <div style="font-size:12px; font-weight:bold; color:#94a3b8;">RATING</div>
             </div>
         </div>
+        <hr style="border:0; border-top:1px solid #f1f5f9; margin: 15px 0;">
+        <p style="color:#334155; line-height:1.6;"><i>"{review_text}"</i></p>
+        <div style="margin-top:15px; font-size:13px; font-weight:700; color:#2563eb;">Match Confidence: {match_pct}%</div>
     </div>
     """
 # ==========================================
+# 4. APP LAYOUT
 # ==========================================
+with gr.Blocks(css=ven_css, title="VEN Project") as demo:
+    gr.Markdown("# 🍔 VEN: Restaurant Matchmaker")
+    gr.Markdown("Select your vibe below to get a personalized recommendation.")
     with gr.Row():
+        with gr.Column():
+            in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
+            in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
+            in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
+            in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite"], label="Occasion", value="Casual dinner")
+            in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
+            btn = gr.Button("Find My Table", variant="primary")
+        with gr.Column():
+            output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
+    # --- STEP 7: REQUIRED QUICK STARTERS ---
+    gr.Markdown("### 🚀 One-Click Examples (Quick Starters)")
+    gr.Examples(
+        examples=[
+            ["Budget-friendly", "Vegetarian", "Friends", "Quick bite", "Moderate/Social"],
+            ["Premium", "Meat-lover", "Date/Couple", "Special occasion", "Quiet/Intimate"],
+            ["Mid-range", "Anything", "Business", "Professional meeting", "Quiet/Intimate"]
+        ],
         inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
+        outputs=output_ui,
+        fn=run_ven_engine,
+        cache_examples=True, # This pre-runs the examples so they are instant!
     )
+    btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
 if __name__ == "__main__":
     demo.launch()