MatanYehudaDataAnalyst commited on
Commit
3b3739d
·
verified ·
1 Parent(s): 16281b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -64
app.py CHANGED
@@ -13,13 +13,15 @@ csv_path = "cleaned_dataset_10k.csv"
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
- raise FileNotFoundError(f"Error: Files not found. I see: {os.listdir('.')}")
 
17
 
18
  # Load Data
19
  df = pd.read_csv(csv_path)
 
20
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
21
 
22
- # Helper to find column names
23
  def get_col(candidates, default):
24
  for c in candidates:
25
  if c in df.columns: return c
@@ -30,96 +32,86 @@ col_rating = get_col(['rating', 'rating_score', 'stars'], 'rating')
30
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
31
  col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
32
 
33
- # Load Embeddings
34
  with open(pkl_path, 'rb') as f:
35
  embedding_data = pickle.load(f)
36
- if isinstance(embedding_data, dict) and 'embeddings' in embedding_data:
37
- dataset_embeddings = embedding_data['embeddings']
38
- else:
39
- dataset_embeddings = embedding_data
40
 
41
- # Load Model
42
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
43
 
44
- # Calculate Personas
45
  persona_profiles = {}
46
  if col_persona in df.columns:
47
  for persona in df[col_persona].unique():
48
  if pd.isna(persona): continue
49
  indices = df[df[col_persona] == persona].index
 
50
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
51
  if valid_indices:
52
- persona_vectors = dataset_embeddings[valid_indices]
53
- persona_profiles[persona] = np.mean(persona_vectors, axis=0)
54
  else:
 
55
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
56
 
57
  # ==========================================
58
- # 2. LOGIC ENGINE
59
  # ==========================================
60
  def run_ven_engine(budget, dietary, company, purpose, noise):
 
61
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
62
  query_vec = model.encode([user_context])
63
 
 
64
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
65
  closest_persona = max(similarities, key=similarities.get)
66
 
67
- if col_persona in df.columns:
68
- persona_df = df[df[col_persona] == closest_persona]
69
- if persona_df.empty: persona_df = df
70
- else:
71
- persona_df = df
72
 
 
73
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
74
-
75
  match_pct = int(similarities[closest_persona] * 100)
76
- review_text = str(top_match[col_review])[:160] + "..."
77
 
 
78
  return f"""
79
- <div style="background: white; border: 1px solid #e2e8f0; border-radius: 20px; padding: 24px; color: #000000 !important;">
80
- <div style="display:flex; justify-content:space-between;">
81
  <div>
82
- <div style="font-size: 22px; font-weight: 800; color: #000000 !important;">{top_match[col_name]}</div>
83
- <div style="font-size: 14px; color: #333333 !important; font-weight: 600;">Match for: {closest_persona}</div>
84
  </div>
85
- <div style="text-align:right;">
86
- <div style="font-size: 28px; font-weight: 900; color: #2563eb !important;">{top_match[col_rating]}</div>
87
- <div style="font-size:12px; font-weight:bold; color: #000000 !important;">RATING</div>
88
  </div>
89
  </div>
90
- <hr style="border:0; border-top:1px solid #cbd5e1; margin: 15px 0;">
91
- <p style="color: #000000 !important; line-height:1.6; font-size: 16px; font-weight: 500; margin-top: 10px;">
92
- <i style="color: #000000 !important;">"{review_text}"</i>
93
- </p>
94
- <div style="margin-top:15px; font-size:13px; font-weight:700; color:#2563eb !important;">Match Confidence: {match_pct}%</div>
 
 
95
  </div>
96
  """
97
 
98
  # ==========================================
99
- # 3. APP UI & CSS FIX (THE NUCLEAR FIX)
100
  # ==========================================
 
101
  ven_css = """
102
- /* מכריח את כל הקונטיינרים להיות כהים, גם במצב Light Mode */
103
- .gradio-container, .gradio-container.light, .gradio-container.dark {
104
- background-color: #0f172a !important;
105
- color: white !important;
106
- }
107
-
108
- /* מוודא שהרקע תמיד כהה */
109
- body, .landing, .wrap, .interface-box {
110
- background-color: #0f172a !important;
111
- color: white !important;
112
- }
113
-
114
- /* טקסטים לבנים לכל הכותרות והתוויות */
115
- h1, h2, h3, h4, h5, h6, label, span, p {
116
- color: white !important;
117
- }
118
-
119
- /* החריג היחיד: כרטיס התוצאה שלנו - שם אנחנו מכריחים שחור */
120
- .gradio-html div, .gradio-html p, .gradio-html i, .gradio-html span, .gradio-html h1, .gradio-html h2 {
121
- color: #000000 !important;
122
- }
123
  """
124
 
125
  with gr.Blocks(css=ven_css, title="VEN Project") as demo:
@@ -127,16 +119,17 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
127
 
128
  with gr.Row():
129
  with gr.Column():
130
- in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="Budget", value="Mid-range")
131
- in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="Diet", value="Anything")
132
- in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="With who?", value="Date/Couple")
133
- in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="Occasion", value="Casual dinner")
134
- in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="Environment", value="Moderate/Social")
135
- btn = gr.Button("Find My Table", variant="primary")
136
-
 
137
  with gr.Column():
138
- output_ui = gr.HTML("<h4>Recommendation will appear here...</h4>")
139
-
140
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
141
  gr.Examples(
142
  examples=[
@@ -147,9 +140,9 @@ with gr.Blocks(css=ven_css, title="VEN Project") as demo:
147
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
148
  outputs=output_ui,
149
  fn=run_ven_engine,
150
- cache_examples=True,
151
  )
152
-
153
  btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
154
 
155
  if __name__ == "__main__":
 
13
  pkl_path = "final_embeddings_10k.pkl"
14
 
15
  if not os.path.exists(csv_path) or not os.path.exists(pkl_path):
16
+ # If files are missing, the app will show an error on startup
17
+ raise FileNotFoundError(f"Error: Required files not found in root directory.")
18
 
19
  # Load Data
20
  df = pd.read_csv(csv_path)
21
+ # Standardize column names (lowercase, no spaces)
22
  df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
23
 
24
+ # Helper to identify correct column names automatically
25
  def get_col(candidates, default):
26
  for c in candidates:
27
  if c in df.columns: return c
 
32
  col_review = get_col(['review', 'review_content', 'review_content_clean'], 'review')
33
  col_persona = get_col(['reviewer_persona', 'persona', 'type'], 'reviewer_persona')
34
 
35
+ # Load Pre-computed Embeddings
36
  with open(pkl_path, 'rb') as f:
37
  embedding_data = pickle.load(f)
38
+ dataset_embeddings = embedding_data['embeddings'] if isinstance(embedding_data, dict) else embedding_data
 
 
 
39
 
40
+ # Load Semantic Model
41
  model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
42
 
43
+ # Calculate Persona Taste Profiles (Mean Vectors)
44
  persona_profiles = {}
45
  if col_persona in df.columns:
46
  for persona in df[col_persona].unique():
47
  if pd.isna(persona): continue
48
  indices = df[df[col_persona] == persona].index
49
+ # Ensure indices are within embedding bounds
50
  valid_indices = [i for i in indices if i < len(dataset_embeddings)]
51
  if valid_indices:
52
+ persona_profiles[persona] = np.mean(dataset_embeddings[valid_indices], axis=0)
 
53
  else:
54
+ # Fallback if no persona column exists
55
  persona_profiles['Default'] = np.mean(dataset_embeddings, axis=0)
56
 
57
  # ==========================================
58
+ # 2. RECOMMENDATION LOGIC
59
  # ==========================================
60
  def run_ven_engine(budget, dietary, company, purpose, noise):
61
+ # Construct user context string for embedding
62
  user_context = f"Searching for a {budget} experience, {dietary} friendly. Group: {company}. Occasion: {purpose}. Atmosphere: {noise}."
63
  query_vec = model.encode([user_context])
64
 
65
+ # Semantic similarity check against persona profiles
66
  similarities = {p: cosine_similarity(query_vec, v.reshape(1, -1))[0][0] for p, v in persona_profiles.items()}
67
  closest_persona = max(similarities, key=similarities.get)
68
 
69
+ # Filter dataset for the matched persona
70
+ persona_df = df[df[col_persona] == closest_persona] if col_persona in df.columns else df
71
+ if persona_df.empty: persona_df = df
 
 
72
 
73
+ # Selection Strategy: Highest rated restaurant for that persona
74
  top_match = persona_df.sort_values(by=col_rating, ascending=False).iloc[0]
 
75
  match_pct = int(similarities[closest_persona] * 100)
76
+ review_text = str(top_match[col_review])[:180] + "..."
77
 
78
+ # Return HTML Card for display
79
  return f"""
80
+ <div style="background: white; border-radius: 15px; padding: 20px; color: #1e293b !important; border-left: 8px solid #f97316; box-shadow: 0 10px 15px rgba(0,0,0,0.1);">
81
+ <div style="display:flex; justify-content:space-between; align-items: flex-start;">
82
  <div>
83
+ <div style="font-size: 24px; font-weight: 800; color: #0f172a !important; margin-bottom: 2px;">{top_match[col_name]}</div>
84
+ <div style="font-size: 14px; color: #64748b !important; font-weight: 600;">AI Match: {closest_persona} profile</div>
85
  </div>
86
+ <div style="text-align:right; background: #f1f5f9; padding: 10px; border-radius: 10px;">
87
+ <div style="font-size: 26px; font-weight: 900; color: #f97316 !important;">{top_match[col_rating]}</div>
88
+ <div style="font-size:10px; font-weight:800; color: #475569 !important; letter-spacing: 1px;">RATING</div>
89
  </div>
90
  </div>
91
+ <div style="margin: 15px 0; font-size: 15px; font-style: italic; color: #334155 !important; line-height: 1.5;">
92
+ "{review_text}"
93
+ </div>
94
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 10px;">
95
+ <span style="font-size: 12px; font-weight: 700; color: #f97316;">Match Confidence: {match_pct}%</span>
96
+ <span style="font-size: 11px; background: #0f172a; color: white; padding: 3px 8px; border-radius: 5px;">VEN Matchmaker</span>
97
+ </div>
98
  </div>
99
  """
100
 
101
  # ==========================================
102
+ # 3. UI & CSS (OPTIMIZED FOR VISIBILITY)
103
  # ==========================================
104
+ # Specific CSS to fix visibility of labels and radio buttons in dark mode
105
  ven_css = """
106
+ .gradio-container { background-color: #0f172a !important; }
107
+ /* Force labels above inputs to be white and visible */
108
+ label span { color: white !important; font-weight: 600 !important; font-size: 14px !important; }
109
+ /* Force radio button choice text to be white */
110
+ .gr-radio label span { color: white !important; font-size: 13px !important; }
111
+ /* Style the primary orange button */
112
+ .ven-button { background-color: #f97316 !important; color: white !important; border: none !important; font-weight: 800 !important; }
113
+ /* Ensure headings are white */
114
+ h1 { color: white !important; text-align: center; }
 
 
 
 
 
 
 
 
 
 
 
 
115
  """
116
 
117
  with gr.Blocks(css=ven_css, title="VEN Project") as demo:
 
119
 
120
  with gr.Row():
121
  with gr.Column():
122
+ with gr.Group(): # Group keeps labels and inputs contained
123
+ in_budget = gr.Dropdown(["Budget-friendly", "Mid-range", "Premium"], label="1. Select Budget", value="Mid-range")
124
+ in_diet = gr.Dropdown(["Anything", "Vegetarian", "Vegan", "Meat-lover"], label="2. Dietary Preference", value="Anything")
125
+ in_company = gr.Radio(["Solo", "Date/Couple", "Friends", "Business"], label="3. Who are you with?", value="Date/Couple")
126
+ in_purpose = gr.Dropdown(["Casual dinner", "Special occasion", "Quick bite", "Professional meeting"], label="4. Occasion", value="Casual dinner")
127
+ in_noise = gr.Radio(["Quiet/Intimate", "Moderate/Social", "Lively/Music"], label="5. Environment vibe", value="Moderate/Social")
128
+ btn = gr.Button("Find My Table", variant="primary", elem_classes="ven-button")
129
+
130
  with gr.Column():
131
+ output_ui = gr.HTML("<div style='text-align:center; padding:50px; color:#94a3b8;'>Your personalized recommendation will appear here...</div>")
132
+
133
  gr.Markdown("### 🚀 Quick Starters (One-Click)")
134
  gr.Examples(
135
  examples=[
 
140
  inputs=[in_budget, in_diet, in_company, in_purpose, in_noise],
141
  outputs=output_ui,
142
  fn=run_ven_engine,
143
+ cache_examples=False, # Disable cache if you want real-time testing
144
  )
145
+
146
  btn.click(run_ven_engine, inputs=[in_budget, in_diet, in_company, in_purpose, in_noise], outputs=output_ui)
147
 
148
  if __name__ == "__main__":