MatanYehudaDataAnalyst commited on
Commit
60b74b8
·
verified ·
1 Parent(s): 3b3739d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -57
app.py CHANGED
@@ -13,15 +13,13 @@ csv_path = "cleaned_dataset_10k.csv"
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
- # If files are missing, the app will show an error on startup
17
- raise FileNotFoundError(f"Error: Required files not found in root directory.")
18
 
19
  # Load Data
20
  df = pd.read_csv(csv_path)
21
- # Standardize column names (lowercase, no spaces)
22
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
23
 
24
- # Helper to identify correct column names automatically
25
  def get_col(candidates, default):
26
  for c in candidates:
27
  if c in df.columns: return c
@@ -32,86 +30,87 @@ col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
32
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
33
  col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
34
 
35
- # Load Pre-computed Embeddings
36
  with open(pkl_path, 'rb') as f:
37
  embedding_data = pickle.load(f)
38
- dataset_embeddings = embedding_data['embeddings'] if isinstance(embedding_data, dict) else embedding_data
 
 
 
39
 
40
- # Load Semantic Model
41
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
42
 
43
- # Calculate Persona Taste Profiles (Mean Vectors)
44
  persona_profiles = {}
45
  if col_persona in df.columns:
46
  for persona in df[col_persona].unique():
47
  if pd.isna(persona): continue
48
  indices = df[df[col_persona] == persona].index
49
- # Ensure indices are within embedding bounds
50
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
51
  if valid_indices:
52
- persona_profiles[persona] = np.mean(dataset_embeddings[valid_indices], axis=0)
 
53
  else:
54
- # Fallback if no persona column exists
55
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
56
 
57
  # ==========================================
58
- # 2. RECOMMENDATION LOGIC
59
  # ==========================================
60
  def run_ven_engine(budget, dietary, company, purpose, noise):
61
- # Construct user context string for embedding
62
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
63
  query_vec = model.encode([user_context])
64
 
65
- # Semantic similarity check against persona profiles
66
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
67
  closest_persona = max(similarities, key=similarities.get)
68
 
69
- # Filter dataset for the matched persona
70
- persona_df = df[df[col_persona] == closest_persona] if col_persona in df.columns else df
71
- if persona_df.empty: persona_df = df
 
 
72
 
73
- # Selection Strategy: Highest rated restaurant for that persona
74
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
 
75
  match_pct = int(similarities[closest_persona] * 100)
76
- review_text = str(top_match[col_review])[:180] + "..."
77
 
78
- # Return HTML Card for display
79
  return f"""
80
- <div style="background: white; border-radius: 15px; padding: 20px; color: #1e293b !important; border-left: 8px solid #f97316; box-shadow: 0 10px 15px rgba(0,0,0,0.1);">
81
- <div style="display:flex; justify-content:space-between; align-items: flex-start;">
82
  <div>
83
- <div style="font-size: 24px; font-weight: 800; color: #0f172a !important; margin-bottom: 2px;">{top_match[col_name]}</div>
84
- <div style="font-size: 14px; color: #64748b !important; font-weight: 600;">AI Match: {closest_persona} profile</div>
85
  </div>
86
- <div style="text-align:right; background: #f1f5f9; padding: 10px; border-radius: 10px;">
87
- <div style="font-size: 26px; font-weight: 900; color: #f97316 !important;">{top_match[col_rating]}</div>
88
- <div style="font-size:10px; font-weight:800; color: #475569 !important; letter-spacing: 1px;">RATING</div>
89
  </div>
90
  </div>
91
- <div style="margin: 15px 0; font-size: 15px; font-style: italic; color: #334155 !important; line-height: 1.5;">
92
- "{review_text}"
93
- </div>
94
- <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 10px;">
95
- <span style="font-size: 12px; font-weight: 700; color: #f97316;">Match Confidence: {match_pct}%</span>
96
- <span style="font-size: 11px; background: #0f172a; color: white; padding: 3px 8px; border-radius: 5px;">VEN Matchmaker</span>
97
- </div>
98
  </div>
99
  """
100
 
101
  # ==========================================
102
- # 3. UI & CSS (OPTIMIZED FOR VISIBILITY)
103
  # ==========================================
104
- # Specific CSS to fix visibility of labels and radio buttons in dark mode
105
  ven_css = """
106
- .gradio-container { background-color: #0f172a !important; }
107
- /* Force labels above inputs to be white and visible */
108
- label span { color: white !important; font-weight: 600 !important; font-size: 14px !important; }
109
- /* Force radio button choice text to be white */
110
- .gr-radio label span { color: white !important; font-size: 13px !important; }
111
- /* Style the primary orange button */
112
- .ven-button { background-color: #f97316 !important; color: white !important; border: none !important; font-weight: 800 !important; }
113
- /* Ensure headings are white */
114
- h1 { color: white !important; text-align: center; }
115
  """
116
 
117
  with gr.Blocks(css=ven_css, title="VEN Project") as demo:
@@ -119,17 +118,16 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
119
 
120
  with gr.Row():
121
  with gr.Column():
122
- with gr.Group(): # Group keeps labels and inputs contained
123
- in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="1. Select Budget", value="Mid-range")
124
- in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="2. Dietary Preference", value="Anything")
125
- in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="3. Who are you with?", value="Date/Couple")
126
- in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="4. Occasion", value="Casual dinner")
127
- in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="5. Environment vibe", value="Moderate/Social")
128
- btn = gr.Button("Find My Table", variant="primary", elem_classes="ven-button")
129
-
130
  with gr.Column():
131
- output_ui = gr.HTML("<div style='text-align:center; padding:50px; color:#94a3b8;'>Your personalized recommendation will appear here...</div>")
132
-
133
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
134
  gr.Examples(
135
  examples=[
@@ -140,9 +138,9 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
140
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
141
  outputs=output_ui,
142
  fn=run_ven_engine,
143
- cache_examples=False, # Disable cache if you want real-time testing
144
  )
145
-
146
  btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
147
 
148
  if __name__ == "__main__":
 
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
+ raise FileNotFoundError(f"Error: Files not found. I see: {os.listdir('.')}")
 
17
 
18
  # Load Data
19
  df = pd.read_csv(csv_path)
 
20
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
21
 
22
+ # Helper to find column names
23
  def get_col(candidates, default):
24
  for c in candidates:
25
  if c in df.columns: return c
 
30
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
31
  col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
32
 
33
+ # Load Embeddings
34
  with open(pkl_path, 'rb') as f:
35
  embedding_data = pickle.load(f)
36
+ if isinstance(embedding_data, dict) and 'embeddings' in embedding_data:
37
+ dataset_embeddings = embedding_data['embeddings']
38
+ else:
39
+ dataset_embeddings = embedding_data
40
 
41
+ # Load Model
42
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
43
 
44
+ # Calculate Personas
45
  persona_profiles = {}
46
  if col_persona in df.columns:
47
  for persona in df[col_persona].unique():
48
  if pd.isna(persona): continue
49
  indices = df[df[col_persona] == persona].index
 
50
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
51
  if valid_indices:
52
+ persona_vectors = dataset_embeddings[valid_indices]
53
+ persona_profiles[persona] = np.mean(persona_vectors, axis=0)
54
  else:
 
55
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
56
 
57
  # ==========================================
58
+ # 2. LOGIC ENGINE
59
  # ==========================================
60
  def run_ven_engine(budget, dietary, company, purpose, noise):
 
61
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
62
  query_vec = model.encode([user_context])
63
 
 
64
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
65
  closest_persona = max(similarities, key=similarities.get)
66
 
67
+ if col_persona in df.columns:
68
+ persona_df = df[df[col_persona] == closest_persona]
69
+ if persona_df.empty: persona_df = df
70
+ else:
71
+ persona_df = df
72
 
 
73
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
74
+
75
  match_pct = int(similarities[closest_persona] * 100)
76
+ review_text = str(top_match[col_review])[:160] + "..."
77
 
78
+ # --- VISUAL FIX ---
79
  return f"""
80
+ <div style="background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; color: #000000 !important;">
81
+ <div style="display:flex; justify-content:space-between;">
82
  <div>
83
+ <div style="font-size: 22px; font-weight: 800; color: #000000 !important;">{top_match[col_name]}</div>
84
+ <div style="font-size: 14px; color: #333333 !important; font-weight: 600;">Match for: {closest_persona}</div>
85
  </div>
86
+ <div style="text-align:right;">
87
+ <div style="font-size: 28px; font-weight: 900; color: #2563eb !important;">{top_match[col_rating]}</div>
88
+ <div style="font-size:12px; font-weight:bold; color: #000000 !important;">RATING</div>
89
  </div>
90
  </div>
91
+ <hr style="border:0; border-top:1px solid #cbd5e1; margin: 15px 0;">
92
+
93
+ <p style="color: #000000 !important; line-height:1.6; font-size: 16px; font-weight: 500; margin-top: 10px;">
94
+ <i style="color: #000000 !important;">"{review_text}"</i>
95
+ </p>
96
+
97
+ <div style="margin-top:15px; font-size:13px; font-weight:700; color:#2563eb !important;">Match Confidence: {match_pct}%</div>
98
  </div>
99
  """
100
 
101
  # ==========================================
102
+ # 3. APP UI & CSS FIX
103
  # ==========================================
 
104
  ven_css = """
105
+ body { background-color: #0f172a !important; font-family: sans-serif !important; }
106
+ /* Global White Text for Dark Mode */
107
+ h1, h2, h3, h4, h5, h6 { color: white !important; }
108
+ p, span, div, label { color: white; }
109
+
110
+ /* Override: Force Black Text inside the Results Card */
111
+ .gradio-html div { color: #000000 !important; }
112
+ .gradio-html p { color: #000000 !important; }
113
+ .gradio-html i { color: #000000 !important; }
114
  """
115
 
116
  with gr.Blocks(css=ven_css, title="VEN Project") as demo:
 
118
 
119
  with gr.Row():
120
  with gr.Column():
121
+ in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
122
+ in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
123
+ in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
124
+ in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="Occasion", value="Casual dinner")
125
+ in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
126
+ btn = gr.Button("Find My Table", variant="primary")
127
+
 
128
  with gr.Column():
129
+ output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
130
+
131
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
132
  gr.Examples(
133
  examples=[
 
138
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
139
  outputs=output_ui,
140
  fn=run_ven_engine,
141
+ cache_examples=True,
142
  )
143
+
144
  btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
145
 
146
  if __name__ == "__main__":