Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,34 +1,43 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
-
import pickle
|
| 4 |
import numpy as np
|
| 5 |
import os
|
| 6 |
import random
|
| 7 |
import base64
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
| 10 |
from IO_pipeline import RecipeDigitalizerPipeline
|
| 11 |
|
| 12 |
# ==========================================
|
| 13 |
-
# 1. SETUP & DATA LOADING
|
| 14 |
# ==========================================
|
| 15 |
hf_token = os.getenv("HF_TOKEN")
|
| 16 |
API_MODEL = "BAAI/bge-small-en-v1.5"
|
| 17 |
client = InferenceClient(token=hf_token) if hf_token else None
|
| 18 |
|
| 19 |
-
print("β³ Loading Data...")
|
| 20 |
try:
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
except Exception as e:
|
| 33 |
print(f"β Error loading data: {e}")
|
| 34 |
df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []})
|
|
@@ -99,7 +108,7 @@ def find_similar_recipes_list(query_text):
|
|
| 99 |
for idx in top_indices:
|
| 100 |
score = scores[idx]
|
| 101 |
row = df_recipes.iloc[idx]
|
| 102 |
-
title = row
|
| 103 |
score_display = f"{score:.3%}"
|
| 104 |
|
| 105 |
# Build the content block
|
|
@@ -344,7 +353,7 @@ button.gallery-item img {
|
|
| 344 |
|
| 345 |
button.gallery-item:hover {
|
| 346 |
transform: scale(2.5) !important;
|
| 347 |
-
z-index: 1000 !important;
|
| 348 |
box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
|
| 349 |
border: 2px solid white !important;
|
| 350 |
border-radius: 8px !important;
|
|
@@ -446,6 +455,10 @@ with gr.Blocks(title="CookBook AI") as demo:
|
|
| 446 |
time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"]
|
| 447 |
post_time = random.choice(time_options)
|
| 448 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
with gr.Group(elem_classes=["content-card"]):
|
| 450 |
gr.HTML(f"""
|
| 451 |
<div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;">
|
|
@@ -453,8 +466,8 @@ with gr.Blocks(title="CookBook AI") as demo:
|
|
| 453 |
<div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} Β· π Public</span></div>
|
| 454 |
</div>
|
| 455 |
""")
|
| 456 |
-
gr.Markdown(f"### {
|
| 457 |
-
gr.Markdown(f"{
|
| 458 |
with gr.Row():
|
| 459 |
gr.Button("π Like", size="sm", variant="secondary")
|
| 460 |
gr.Button("π¬ Comment", size="sm", variant="secondary")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
import random
|
| 6 |
import base64
|
| 7 |
from huggingface_hub import InferenceClient
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
from datasets import load_dataset # Added for HF Dataset loading
|
| 10 |
from IO_pipeline import RecipeDigitalizerPipeline
|
| 11 |
|
| 12 |
# ==========================================
|
| 13 |
+
# 1. SETUP & DATA LOADING (UPDATED)
|
| 14 |
# ==========================================
|
| 15 |
hf_token = os.getenv("HF_TOKEN")
|
| 16 |
API_MODEL = "BAAI/bge-small-en-v1.5"
|
| 17 |
client = InferenceClient(token=hf_token) if hf_token else None
|
| 18 |
|
| 19 |
+
print("β³ Loading Data from Hugging Face...")
|
| 20 |
try:
|
| 21 |
+
# Load dataset from Hugging Face
|
| 22 |
+
# We load the 'train' split by default.
|
| 23 |
+
dataset = load_dataset("Liori25/10k_recipes", split="train")
|
| 24 |
+
|
| 25 |
+
# Convert to Pandas DataFrame
|
| 26 |
+
df_recipes = dataset.to_pandas()
|
| 27 |
+
|
| 28 |
+
# Extract Embeddings
|
| 29 |
+
# We look for common names for the embedding column
|
| 30 |
+
target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in df_recipes.columns), None)
|
| 31 |
+
|
| 32 |
+
if target_col:
|
| 33 |
+
# Convert the column of lists into a 2D numpy array
|
| 34 |
+
# This handles the conversion from the HF list format to the numpy matrix required for cosine_similarity
|
| 35 |
+
stored_embeddings = np.vstack(df_recipes[target_col].values)
|
| 36 |
+
print(f"β
Data Loaded from HF! Shape: {stored_embeddings.shape}")
|
| 37 |
+
else:
|
| 38 |
+
print("β οΈ No embedding column found in dataset.")
|
| 39 |
+
stored_embeddings = None
|
| 40 |
+
|
| 41 |
except Exception as e:
|
| 42 |
print(f"β Error loading data: {e}")
|
| 43 |
df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []})
|
|
|
|
| 108 |
for idx in top_indices:
|
| 109 |
score = scores[idx]
|
| 110 |
row = df_recipes.iloc[idx]
|
| 111 |
+
title = row.get('Title', 'Unknown Recipe')
|
| 112 |
score_display = f"{score:.3%}"
|
| 113 |
|
| 114 |
# Build the content block
|
|
|
|
| 353 |
|
| 354 |
button.gallery-item:hover {
|
| 355 |
transform: scale(2.5) !important;
|
| 356 |
+
z-index: 1000 !important;
|
| 357 |
box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
|
| 358 |
border: 2px solid white !important;
|
| 359 |
border-radius: 8px !important;
|
|
|
|
| 455 |
time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"]
|
| 456 |
post_time = random.choice(time_options)
|
| 457 |
|
| 458 |
+
# Fallback for feed display
|
| 459 |
+
raw_desc = str(row.get('Raw_Output', 'Delicious recipe...'))[:250]
|
| 460 |
+
title_feed = row.get('Title', 'Recipe')
|
| 461 |
+
|
| 462 |
with gr.Group(elem_classes=["content-card"]):
|
| 463 |
gr.HTML(f"""
|
| 464 |
<div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;">
|
|
|
|
| 466 |
<div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} Β· π Public</span></div>
|
| 467 |
</div>
|
| 468 |
""")
|
| 469 |
+
gr.Markdown(f"### {title_feed}")
|
| 470 |
+
gr.Markdown(f"{raw_desc}...")
|
| 471 |
with gr.Row():
|
| 472 |
gr.Button("π Like", size="sm", variant="secondary")
|
| 473 |
gr.Button("π¬ Comment", size="sm", variant="secondary")
|