Liori25 commited on
Commit
5399716
Β·
verified Β·
1 Parent(s): f8e6422

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -1,34 +1,43 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import pickle
4
  import numpy as np
5
  import os
6
  import random
7
  import base64
8
  from huggingface_hub import InferenceClient
9
  from sklearn.metrics.pairwise import cosine_similarity
 
10
  from IO_pipeline import RecipeDigitalizerPipeline
11
 
12
  # ==========================================
13
- # 1. SETUP & DATA LOADING
14
  # ==========================================
15
  hf_token = os.getenv("HF_TOKEN")
16
  API_MODEL = "BAAI/bge-small-en-v1.5"
17
  client = InferenceClient(token=hf_token) if hf_token else None
18
 
19
- print("⏳ Loading Data...")
20
  try:
21
- df_recipes = pd.read_csv('RecipeData_10K.csv')
22
- with open('recipe_embeddings.pkl', 'rb') as f:
23
- data = pickle.load(f)
24
- if isinstance(data, dict):
25
- stored_embeddings = np.array(data['embeddings'])
26
- elif isinstance(data, pd.DataFrame):
27
- target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in data.columns), None)
28
- stored_embeddings = np.vstack(data[target_col].values) if target_col else data
29
- else:
30
- stored_embeddings = data
31
- print("βœ… Data Loaded!")
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
  print(f"❌ Error loading data: {e}")
34
  df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []})
@@ -99,7 +108,7 @@ def find_similar_recipes_list(query_text):
99
  for idx in top_indices:
100
  score = scores[idx]
101
  row = df_recipes.iloc[idx]
102
- title = row['Title']
103
  score_display = f"{score:.3%}"
104
 
105
  # Build the content block
@@ -344,7 +353,7 @@ button.gallery-item img {
344
 
345
  button.gallery-item:hover {
346
  transform: scale(2.5) !important;
347
- z-index: 1000 !important;
348
  box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
349
  border: 2px solid white !important;
350
  border-radius: 8px !important;
@@ -446,6 +455,10 @@ with gr.Blocks(title="CookBook AI") as demo:
446
  time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"]
447
  post_time = random.choice(time_options)
448
 
 
 
 
 
449
  with gr.Group(elem_classes=["content-card"]):
450
  gr.HTML(f"""
451
  <div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;">
@@ -453,8 +466,8 @@ with gr.Blocks(title="CookBook AI") as demo:
453
  <div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} Β· 🌍 Public</span></div>
454
  </div>
455
  """)
456
- gr.Markdown(f"### {row['Title']}")
457
- gr.Markdown(f"{str(row['Raw_Output'])[:250]}...")
458
  with gr.Row():
459
  gr.Button("πŸ‘ Like", size="sm", variant="secondary")
460
  gr.Button("πŸ’¬ Comment", size="sm", variant="secondary")
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import numpy as np
4
  import os
5
  import random
6
  import base64
7
  from huggingface_hub import InferenceClient
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ from datasets import load_dataset # Added for HF Dataset loading
10
  from IO_pipeline import RecipeDigitalizerPipeline
11
 
12
  # ==========================================
13
+ # 1. SETUP & DATA LOADING (UPDATED)
14
  # ==========================================
15
  hf_token = os.getenv("HF_TOKEN")
16
  API_MODEL = "BAAI/bge-small-en-v1.5"
17
  client = InferenceClient(token=hf_token) if hf_token else None
18
 
19
+ print("⏳ Loading Data from Hugging Face...")
20
  try:
21
+ # Load dataset from Hugging Face
22
+ # We load the 'train' split by default.
23
+ dataset = load_dataset("Liori25/10k_recipes", split="train")
24
+
25
+ # Convert to Pandas DataFrame
26
+ df_recipes = dataset.to_pandas()
27
+
28
+ # Extract Embeddings
29
+ # We look for common names for the embedding column
30
+ target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in df_recipes.columns), None)
31
+
32
+ if target_col:
33
+ # Convert the column of lists into a 2D numpy array
34
+ # This handles the conversion from the HF list format to the numpy matrix required for cosine_similarity
35
+ stored_embeddings = np.vstack(df_recipes[target_col].values)
36
+ print(f"βœ… Data Loaded from HF! Shape: {stored_embeddings.shape}")
37
+ else:
38
+ print("⚠️ No embedding column found in dataset.")
39
+ stored_embeddings = None
40
+
41
  except Exception as e:
42
  print(f"❌ Error loading data: {e}")
43
  df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []})
 
108
  for idx in top_indices:
109
  score = scores[idx]
110
  row = df_recipes.iloc[idx]
111
+ title = row.get('Title', 'Unknown Recipe')
112
  score_display = f"{score:.3%}"
113
 
114
  # Build the content block
 
353
 
354
  button.gallery-item:hover {
355
  transform: scale(2.5) !important;
356
+ z-index: 1000 !important;
357
  box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
358
  border: 2px solid white !important;
359
  border-radius: 8px !important;
 
455
  time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"]
456
  post_time = random.choice(time_options)
457
 
458
+ # Fallback for feed display
459
+ raw_desc = str(row.get('Raw_Output', 'Delicious recipe...'))[:250]
460
+ title_feed = row.get('Title', 'Recipe')
461
+
462
  with gr.Group(elem_classes=["content-card"]):
463
  gr.HTML(f"""
464
  <div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;">
 
466
  <div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} Β· 🌍 Public</span></div>
467
  </div>
468
  """)
469
+ gr.Markdown(f"### {title_feed}")
470
+ gr.Markdown(f"{raw_desc}...")
471
  with gr.Row():
472
  gr.Button("πŸ‘ Like", size="sm", variant="secondary")
473
  gr.Button("πŸ’¬ Comment", size="sm", variant="secondary")