CookBookAI / app.py
Liori25's picture
Update app.py
f145dfd verified
import gradio as gr
import pandas as pd
import pickle
import numpy as np
import os
import random
import base64
from huggingface_hub import InferenceClient
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
from IO_pipeline import RecipeDigitalizerPipeline
# ==========================================
# 1. SETUP & DATA LOADING (HYBRID)
# ==========================================
hf_token = os.getenv("HF_TOKEN")
API_MODEL = "BAAI/bge-small-en-v1.5"
client = InferenceClient(token=hf_token) if hf_token else None
print("โณ Initializing Data Loading...")
# --- A. Load Text Data from Hugging Face Dataset ---
try:
print(" ...Downloading recipes from HF Dataset (Liori25/10k_recipes)")
dataset = load_dataset("Liori25/10k_recipes", split="train")
df_recipes = dataset.to_pandas()
print(f"โœ… Recipes Loaded! Count: {len(df_recipes)}")
except Exception as e:
print(f"โŒ Error loading HF Dataset: {e}")
df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []})
# --- B. Load Embeddings from Local File (Space Repo) ---
try:
print(" ...Loading embeddings from local 'recipe_embeddings.pkl'")
if os.path.exists('recipe_embeddings.pkl'):
with open('recipe_embeddings.pkl', 'rb') as f:
data = pickle.load(f)
# Logic to handle different pickle formats
if isinstance(data, dict):
stored_embeddings = np.array(data['embeddings'])
elif isinstance(data, pd.DataFrame):
target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in data.columns), None)
stored_embeddings = np.vstack(data[target_col].values) if target_col else data
else:
stored_embeddings = data
print(f"โœ… Embeddings Loaded! Shape: {stored_embeddings.shape}")
else:
print("โŒ 'recipe_embeddings.pkl' not found locally.")
stored_embeddings = None
except Exception as e:
print(f"โŒ Error loading pickle file: {e}")
stored_embeddings = None
# --- C. Safety Check ---
if stored_embeddings is not None and not df_recipes.empty:
if len(stored_embeddings) != len(df_recipes):
print(f"โš ๏ธ WARNING: Row mismatch! Recipes: {len(df_recipes)}, Embeddings: {len(stored_embeddings)}")
# ==========================================
# 2. HELPER: IMAGE TO BASE64
# ==========================================
def image_to_base64(image_path):
if not os.path.exists(image_path):
return "R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"
with open(image_path, "rb") as img_file:
return base64.b64encode(img_file.read()).decode('utf-8')
logo_b64 = image_to_base64("cookbook logo.png")
profile_b64 = image_to_base64("chef.avif")
process_b64 = image_to_base64("preview of process.jpg")
# ==========================================
# 3. BACKEND LOGIC
# ==========================================
def get_embedding_via_api(text):
if not client: raise ValueError("HF_TOKEN missing")
response = client.feature_extraction(text, model=API_MODEL)
return np.array(response)
def find_similar_recipes_list(query_text):
if stored_embeddings is None: return ["Database error: Embeddings missing."] * 3
if df_recipes.empty: return ["Database error: Recipes missing."] * 3
query_vec = get_embedding_via_api("Represent this recipe for retrieving similar dishes: " + query_text)
if len(query_vec.shape) == 1: query_vec = query_vec.reshape(1, -1)
# Calculate Similarity
scores = cosine_similarity(query_vec, stored_embeddings)[0]
top_indices = scores.argsort()[-3:][::-1]
# Identify column names
cols = df_recipes.columns
ing_col = next((c for c in cols if 'ingredient' in c.lower()), None)
inst_col = next((c for c in cols if 'instruction' in c.lower()), None)
results_list = []
# --- HELPER TO CHECK FOR ERRORS & HIDE BLOCKS ---
def clean_and_validate(raw_text):
val = str(raw_text).strip()
# 1. Clean list syntax (['...'])
if val.startswith("[") and val.endswith("]"):
val = val[1:-1].replace("'", "").replace('"', "").strip()
val_lower = val.lower()
# 2. Basic Empty Checks
if val_lower in ['nan', 'none', 'null', '[]', '']:
return None
# 3. STRICT ERROR CHECK: If "parse error" or "error" is in the text, return None to hide the block
if "parse error" in val_lower or "error" in val_lower:
return None
return val
for idx in top_indices:
score = scores[idx]
row = df_recipes.iloc[idx]
title = row.get('Title', 'Unknown Recipe')
score_display = f"{score:.3%}"
content_parts = []
# 1. Check Ingredients
if ing_col:
cleaned_ing = clean_and_validate(row[ing_col])
# Only add to display if valid AND no error found
if cleaned_ing:
content_parts.append(f"<b>๐Ÿ›’ INGREDIENTS:</b><br>{cleaned_ing}")
# 2. Check Instructions
if inst_col:
cleaned_inst = clean_and_validate(row[inst_col])
# Only add to display if valid AND no error found
if cleaned_inst:
content_parts.append(f"<b>๐Ÿณ INSTRUCTIONS:</b><br>{cleaned_inst}")
# 3. Fallback logic
if not content_parts:
# If both were hidden (due to errors) or empty, check raw output
raw_out = str(row.get('Raw_Output', 'No details available.'))
# Also hide Raw Output if it contains an error
if "parse error" in raw_out.lower() or "error" in raw_out.lower():
display_text = "<i>Details unavailable for this recipe.</i>"
else:
display_text = raw_out
else:
display_text = "<br><br>".join(content_parts)
card_content = (
f"### ๐Ÿ† {title}\n"
f"<span style='color:#1877f2; font-weight:bold; font-size:14px;'>Match Score: {score_display}</span>\n\n"
f"<div class='sim-scroll'>{display_text}</div>"
)
results_list.append(card_content)
while len(results_list) < 3:
results_list.append("")
return results_list
def format_recipe(json_data):
if "error" in json_data: return f"Error: {json_data['error']}", ""
title = json_data.get("title", "Unknown")
ing = "\n".join([f"- {x}" for x in json_data.get("ingredients", [])])
inst = "\n".join([f"{i+1}. {x}" for i, x in enumerate(json_data.get("instructions", []))])
text = f"๐Ÿฝ๏ธ {title}\n\n๐Ÿ›’ INGREDIENTS:\n{ing}\n\n๐Ÿณ INSTRUCTIONS:\n{inst}"
return text, f"{title} {ing} {inst}"
def ui_update_pipeline(image_path):
if not hf_token:
return "Error: HF_TOKEN missing", "", gr.update(), gr.update(), "", gr.update(), ""
try:
os.environ["HF_TOKEN"] = hf_token
digitizer = RecipeDigitalizerPipeline()
json_res = digitizer.run_pipeline(image_path)
readable, query = format_recipe(json_res)
if query:
sim_list = find_similar_recipes_list(query)
else:
sim_list = ["No query generated.", "", ""]
return (readable, sim_list[0], gr.update(visible=True), gr.update(visible=True), sim_list[1], gr.update(visible=True), sim_list[2])
except Exception as e:
return f"Error: {e}", "Error", gr.update(), gr.update(), "", gr.update(), ""
# ==========================================
# 4. MODERN UI THEME & CSS
# ==========================================
theme = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui']
)
modern_css = """
body, .gradio-container { background-color: #f0f2f5; }
/* Sticky Header */
.custom-header {
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(10px);
border-bottom: 1px solid #e4e6eb;
padding: 15px 20px;
display: flex;
align-items: center;
justify-content: space-between;
position: sticky;
top: 0;
z-index: 1000;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
}
.logo-area { display: flex; align-items: center; gap: 20px; }
.logo-img { height: 120px; width: 120px; border-radius: 12px; object-fit: cover; border: 1px solid #ddd; }
.text-area { display: flex; flex-direction: column; }
.app-name {
font-weight: 800;
font-size: 32px;
background: -webkit-linear-gradient(45deg, #1877f2, #6b21a8);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
line-height: 1.2;
}
.app-slogan { font-size: 16px; color: #65676b; font-weight: 500; }
/* Sidebar Navigation */
.nav-btn {
text-align: left !important;
justify-content: flex-start !important;
background: transparent !important;
border: none !important;
box-shadow: none !important;
color: #65676b !important;
font-weight: 600 !important;
font-size: 16px !important;
padding: 12px 16px !important;
border-radius: 10px !important;
transition: all 0.2s ease;
}
.nav-btn:hover { background-color: #e4e6eb !important; color: #050505 !important; }
.nav-btn.selected {
background-color: #e7f3ff !important;
color: #1877f2 !important;
border-left: 4px solid #1877f2 !important;
}
/* Feed Styling */
#feed-container {
gap: 0px !important;
padding: 0px !important;
}
#feed-container > .form {
gap: 0px !important;
}
.content-card {
background-color: #ffffff !important;
background: #ffffff !important;
border-radius: 12px;
box-shadow: 0 1px 2px rgba(0,0,0,0.1);
border: 1px solid #ddd;
padding: 20px;
margin-bottom: 7px !important;
margin-top: 0px !important;
width: 100%;
display: block;
}
/* Similar Recipe Cards */
.sim-card {
background: #fff;
border: 1px solid #eee;
border-radius: 8px;
padding: 15px;
height: 100%;
border-top: 4px solid #1877f2;
display: flex;
flex-direction: column;
justify-content: space-between;
}
.sim-scroll {
height: 400px;
overflow-y: auto;
margin-bottom: 10px;
padding-right: 5px;
font-size: 14px;
color: #4b4f56;
}
.trend-box {
background:white;
padding:10px;
border-radius:8px;
margin-bottom:10px;
box-shadow:0 1px 2px rgba(0,0,0,0.1);
transition: background 0.2s;
}
.trend-box:hover { background: #f0f2f5; cursor: pointer; }
/* Contact List Styling */
.contact-item {
display: flex;
align-items: center;
padding: 10px;
background: white;
border-radius: 8px;
margin-bottom: 8px;
box-shadow: 0 1px 2px rgba(0,0,0,0.1);
cursor: pointer;
transition: 0.2s;
}
.contact-item:hover { background: #f0f2f5; }
.avatar-circle {
width: 36px; height: 36px;
background: #e4e6eb;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
color: #050505;
margin-right: 12px;
position: relative;
font-size: 13px;
border: 1px solid #ddd;
}
.status-badge {
width: 12px; height: 12px;
border-radius: 50%;
border: 2px solid white;
position: absolute;
bottom: -1px; right: -2px;
}
.status-green { background-color: #31a24c; }
.status-red { background-color: #f02849; }
.gap-fix { gap: 25px !important; }
.gradio-examples {
display: flex;
justify-content: center;
width: 100%;
}
/* EXAMPLE IMAGES STYLING UPDATED */
button.gallery-item {
transition: transform 0.2s ease, box-shadow 0.2s ease !important;
z-index: 1;
width: 80px !important; /* Force width */
height: 80px !important; /* Force height */
overflow: hidden !important;
}
button.gallery-item img {
width: 100% !important;
height: 100% !important;
object-fit: cover !important;
}
button.gallery-item:hover {
transform: scale(2.5) !important;
z-index: 1000 !important;
box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
border: 2px solid white !important;
border-radius: 8px !important;
}
"""
# ==========================================
# 5. LAYOUT CONSTRUCTION
# ==========================================
with gr.Blocks(title="CookBook AI") as demo:
# --- HEADER ---
gr.HTML(f"""
<div class="custom-header">
<div class="logo-area">
<img src="data:image/jpeg;base64,{logo_b64}" class="logo-img">
<div class="text-area">
<span class="app-name">CookBook AI</span>
<span class="app-slogan">Turning Handwritten Recipes into a Digital Recipe.</span>
</div>
</div>
<div style="color: #65676b; font-weight: 600;">v4.4</div>
</div>
""")
with gr.Row():
# --- LEFT SIDEBAR ---
with gr.Column(scale=1, min_width=200):
gr.HTML(f"""
<div style="display:flex; align-items:center; padding: 10px 10px 5px 10px;">
<img src="data:image/jpeg;base64,{profile_b64}" style="width:40px; height:40px; border-radius:50%; margin-right:10px; object-fit:cover;">
<b style="font-size: 16px;">My Profile</b>
</div>
""")
gr.HTML("<hr style='border: 0; border-top: 1px solid #e4e6eb; margin: 10px 0 20px 0;'>")
nav_digital = gr.Button("โœจ AI Digitizer", elem_classes=["nav-btn", "selected"])
nav_feed = gr.Button("๐Ÿ“ฐ News Feed", elem_classes=["nav-btn"])
nav_about = gr.Button("โ„น๏ธ About", elem_classes=["nav-btn"])
# --- CENTER CONTENT ---
with gr.Column(scale=3):
# === VIEW 1: AI DIGITALIZER ===
with gr.Group(visible=True) as digitalizer_view:
with gr.Row(elem_classes=["gap-fix"]):
with gr.Column(scale=1):
with gr.Group(elem_classes=["content-card"]):
input_img = gr.Image(type="filepath", label="Upload", height=300)
magic_btn = gr.Button("โœจ Convert to Digital", variant="primary", size="lg")
# --- UPDATED EXAMPLES HERE ---
gr.Examples(
examples=[
["quick_tries_images/applecrisp.jpg"],
["quick_tries_images/meatballs recipe.jpg"],
["quick_tries_images/Apple Dapple (aka Fresh Apple Cake).jfif"]
],
inputs=input_img,
label="Or try these examples:",
cache_examples=False
)
with gr.Column(scale=1):
with gr.Group(elem_classes=["content-card"]):
out_text = gr.Textbox(label="Result", value="Here your digitalized recipe will be presented", lines=20, interactive=False, show_label=False)
gr.HTML("<div style='height: 35px;'></div>")
gr.Markdown("### 3. Similar Recipes from Database")
with gr.Row():
with gr.Column(elem_classes=["sim-card"]) as c1_box:
sim1 = gr.Markdown("Once you will upload your scanned recipe, we will share similar recipes!")
with gr.Row(visible=False) as c1_btns:
gr.Button("๐Ÿ‘ Like", size="sm", variant="secondary")
gr.Button("โ†—๏ธ Share", size="sm", variant="secondary")
with gr.Column(elem_classes=["sim-card"], visible=False) as c2_box:
sim2 = gr.Markdown("")
with gr.Row():
gr.Button("๐Ÿ‘ Like", size="sm", variant="secondary")
gr.Button("โ†—๏ธ Share", size="sm", variant="secondary")
with gr.Column(elem_classes=["sim-card"], visible=False) as c3_box:
sim3 = gr.Markdown("")
with gr.Row():
gr.Button("๐Ÿ‘ Like", size="sm", variant="secondary")
gr.Button("โ†—๏ธ Share", size="sm", variant="secondary")
magic_btn.click(ui_update_pipeline, input_img, [out_text, sim1, c1_btns, c2_box, sim2, c3_box, sim3])
# === VIEW 2: FEED ===
with gr.Column(visible=False, elem_id="feed-container") as feed_view:
if not df_recipes.empty:
feed_samples = df_recipes.sample(10)
for index, row in feed_samples.iterrows():
user_name = random.choice(["Grandma Rose", "Chef Mike", "Sarah J."])
emoji = random.choice(["๐Ÿฅ˜", "๐Ÿฅ—", "๐Ÿฐ", "๐ŸŒฎ"])
time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"]
post_time = random.choice(time_options)
raw_desc = str(row.get('Raw_Output', 'Delicious recipe...'))[:250]
title_feed = row.get('Title', 'Recipe')
with gr.Group(elem_classes=["content-card"]):
gr.HTML(f"""
<div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;">
<div style="width:40px; height:40px; background:#e4e6eb; border-radius:50%; display:flex; align-items:center; justify-content:center; font-size:20px;">{emoji}</div>
<div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} ยท ๐ŸŒ Public</span></div>
</div>
""")
gr.Markdown(f"### {title_feed}")
gr.Markdown(f"{raw_desc}...")
with gr.Row():
gr.Button("๐Ÿ‘ Like", size="sm", variant="secondary")
gr.Button("๐Ÿ’ฌ Comment", size="sm", variant="secondary")
gr.Button("โ†—๏ธ Share", size="sm", variant="secondary")
else:
gr.Markdown("โš ๏ธ Database is empty.")
# === VIEW 3: ABOUT (UPDATED) ===
with gr.Group(visible=False) as about_view:
with gr.Group(elem_classes=["content-card"]):
gr.Markdown("""
# Goal Project
The goal of this project is to develop an app that takes a scanned image of a handwritten recipe as input, generates text using a VLM, and based on the extracted text, suggests 3 similar recipes from a 10K dataset of synthetic recipes. Our app will bridge the gap between analog culinary heritage and digital discovery.
### About Us
This app was developed by **Shahar Firshtman** and **Lior Feinstein**, 2nd year students for Economics and data science.
""")
# Process Image
gr.HTML(f"""
<div style="margin-top: 20px;">
<h3 style="color: #444;">Process Overview</h3>
<img src="data:image/jpeg;base64,{process_b64}" style="width: 100%; height: auto; border-radius: 8px; border: 1px solid #ddd;">
</div>
""")
# --- RIGHT COLUMN ---
with gr.Column(scale=1, min_width=200):
# 1. Trending
gr.Markdown("### Trending Recipes")
def trend_box(title, likes):
return f"<div class='trend-box'><b>{title}</b><br><span style='color:gray; font-size:12px;'>{likes} likes</span></div>"
gr.HTML(trend_box("๐Ÿœ Ramen Hack", "12k") + trend_box("๐Ÿช Best Cookies", "8k") + trend_box("๐Ÿฐ Cheese Cake", "15k") + trend_box("๐Ÿช Nana's Tahini Cookies", "9k"))
# 2. Contacts
gr.HTML("<div style='height: 20px;'></div>")
gr.Markdown("### Quick Contacts")
def contact_box(name, initials, status_color):
return f"""
<div class='contact-item'>
<div class='avatar-circle'>
{initials}
<div class='status-badge status-{status_color}'></div>
</div>
<div style='font-weight:600; font-size:14px; color:#050505;'>{name}</div>
</div>
"""
contact_html = (
contact_box("Elon Musk", "EM", "green") +
contact_box("Gordon Ramsay", "GR", "red") +
contact_box("Guy Fieri", "GF", "green") +
contact_box("Bobby Flay", "BF", "red")
)
gr.HTML(contact_html)
# ==========================================
# 6. JAVASCRIPT LOGIC
# ==========================================
def go_digi():
return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"]))
def go_feed():
return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"]))
def go_about():
return (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"]))
outputs_ui = [digitalizer_view, feed_view, about_view, nav_digital, nav_feed, nav_about]
nav_digital.click(go_digi, None, outputs_ui)
nav_feed.click(go_feed, None, outputs_ui)
nav_about.click(go_about, None, outputs_ui)
if __name__ == "__main__":
demo.launch(theme=theme, css=modern_css)