import gradio as gr import pandas as pd import pickle import numpy as np import os import random import base64 from huggingface_hub import InferenceClient from sklearn.metrics.pairwise import cosine_similarity from datasets import load_dataset from IO_pipeline import RecipeDigitalizerPipeline # ========================================== # 1. SETUP & DATA LOADING (HYBRID) # ========================================== hf_token = os.getenv("HF_TOKEN") API_MODEL = "BAAI/bge-small-en-v1.5" client = InferenceClient(token=hf_token) if hf_token else None print("⏳ Initializing Data Loading...") # --- A. Load Text Data from Hugging Face Dataset --- try: print(" ...Downloading recipes from HF Dataset (Liori25/10k_recipes)") dataset = load_dataset("Liori25/10k_recipes", split="train") df_recipes = dataset.to_pandas() print(f"✅ Recipes Loaded! Count: {len(df_recipes)}") except Exception as e: print(f"❌ Error loading HF Dataset: {e}") df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []}) # --- B. Load Embeddings from Local File (Space Repo) --- try: print(" ...Loading embeddings from local 'recipe_embeddings.pkl'") if os.path.exists('recipe_embeddings.pkl'): with open('recipe_embeddings.pkl', 'rb') as f: data = pickle.load(f) # Logic to handle different pickle formats if isinstance(data, dict): stored_embeddings = np.array(data['embeddings']) elif isinstance(data, pd.DataFrame): target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in data.columns), None) stored_embeddings = np.vstack(data[target_col].values) if target_col else data else: stored_embeddings = data print(f"✅ Embeddings Loaded! Shape: {stored_embeddings.shape}") else: print("❌ 'recipe_embeddings.pkl' not found locally.") stored_embeddings = None except Exception as e: print(f"❌ Error loading pickle file: {e}") stored_embeddings = None # --- C. Safety Check --- if stored_embeddings is not None and not df_recipes.empty: if len(stored_embeddings) != len(df_recipes): print(f"⚠️ WARNING: Row mismatch! Recipes: {len(df_recipes)}, Embeddings: {len(stored_embeddings)}") # ========================================== # 2. HELPER: IMAGE TO BASE64 # ========================================== def image_to_base64(image_path): if not os.path.exists(image_path): return "R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" with open(image_path, "rb") as img_file: return base64.b64encode(img_file.read()).decode('utf-8') logo_b64 = image_to_base64("cookbook logo.png") profile_b64 = image_to_base64("chef.avif") process_b64 = image_to_base64("preview of process.jpg") # ========================================== # 3. BACKEND LOGIC # ========================================== def get_embedding_via_api(text): if not client: raise ValueError("HF_TOKEN missing") response = client.feature_extraction(text, model=API_MODEL) return np.array(response) def find_similar_recipes_list(query_text): if stored_embeddings is None: return ["Database error: Embeddings missing."] * 3 if df_recipes.empty: return ["Database error: Recipes missing."] * 3 query_vec = get_embedding_via_api("Represent this recipe for retrieving similar dishes: " + query_text) if len(query_vec.shape) == 1: query_vec = query_vec.reshape(1, -1) # Calculate Similarity scores = cosine_similarity(query_vec, stored_embeddings)[0] top_indices = scores.argsort()[-3:][::-1] # Identify column names cols = df_recipes.columns ing_col = next((c for c in cols if 'ingredient' in c.lower()), None) inst_col = next((c for c in cols if 'instruction' in c.lower()), None) results_list = [] # --- HELPER TO CHECK FOR ERRORS & HIDE BLOCKS --- def clean_and_validate(raw_text): val = str(raw_text).strip() # 1. Clean list syntax (['...']) if val.startswith("[") and val.endswith("]"): val = val[1:-1].replace("'", "").replace('"', "").strip() val_lower = val.lower() # 2. Basic Empty Checks if val_lower in ['nan', 'none', 'null', '[]', '']: return None # 3. STRICT ERROR CHECK: If "parse error" or "error" is in the text, return None to hide the block if "parse error" in val_lower or "error" in val_lower: return None return val for idx in top_indices: score = scores[idx] row = df_recipes.iloc[idx] title = row.get('Title', 'Unknown Recipe') score_display = f"{score:.3%}" content_parts = [] # 1. Check Ingredients if ing_col: cleaned_ing = clean_and_validate(row[ing_col]) # Only add to display if valid AND no error found if cleaned_ing: content_parts.append(f"🛒 INGREDIENTS:
{cleaned_ing}") # 2. Check Instructions if inst_col: cleaned_inst = clean_and_validate(row[inst_col]) # Only add to display if valid AND no error found if cleaned_inst: content_parts.append(f"🍳 INSTRUCTIONS:
{cleaned_inst}") # 3. Fallback logic if not content_parts: # If both were hidden (due to errors) or empty, check raw output raw_out = str(row.get('Raw_Output', 'No details available.')) # Also hide Raw Output if it contains an error if "parse error" in raw_out.lower() or "error" in raw_out.lower(): display_text = "Details unavailable for this recipe." else: display_text = raw_out else: display_text = "

".join(content_parts) card_content = ( f"### 🏆 {title}\n" f"Match Score: {score_display}\n\n" f"
{display_text}
" ) results_list.append(card_content) while len(results_list) < 3: results_list.append("") return results_list def format_recipe(json_data): if "error" in json_data: return f"Error: {json_data['error']}", "" title = json_data.get("title", "Unknown") ing = "\n".join([f"- {x}" for x in json_data.get("ingredients", [])]) inst = "\n".join([f"{i+1}. {x}" for i, x in enumerate(json_data.get("instructions", []))]) text = f"🍽️ {title}\n\n🛒 INGREDIENTS:\n{ing}\n\n🍳 INSTRUCTIONS:\n{inst}" return text, f"{title} {ing} {inst}" def ui_update_pipeline(image_path): if not hf_token: return "Error: HF_TOKEN missing", "", gr.update(), gr.update(), "", gr.update(), "" try: os.environ["HF_TOKEN"] = hf_token digitizer = RecipeDigitalizerPipeline() json_res = digitizer.run_pipeline(image_path) readable, query = format_recipe(json_res) if query: sim_list = find_similar_recipes_list(query) else: sim_list = ["No query generated.", "", ""] return (readable, sim_list[0], gr.update(visible=True), gr.update(visible=True), sim_list[1], gr.update(visible=True), sim_list[2]) except Exception as e: return f"Error: {e}", "Error", gr.update(), gr.update(), "", gr.update(), "" # ========================================== # 4. MODERN UI THEME & CSS # ========================================== theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui'] ) modern_css = """ body, .gradio-container { background-color: #f0f2f5; } /* Sticky Header */ .custom-header { background: rgba(255, 255, 255, 0.95); backdrop-filter: blur(10px); border-bottom: 1px solid #e4e6eb; padding: 15px 20px; display: flex; align-items: center; justify-content: space-between; position: sticky; top: 0; z-index: 1000; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } .logo-area { display: flex; align-items: center; gap: 20px; } .logo-img { height: 120px; width: 120px; border-radius: 12px; object-fit: cover; border: 1px solid #ddd; } .text-area { display: flex; flex-direction: column; } .app-name { font-weight: 800; font-size: 32px; background: -webkit-linear-gradient(45deg, #1877f2, #6b21a8); -webkit-background-clip: text; -webkit-text-fill-color: transparent; line-height: 1.2; } .app-slogan { font-size: 16px; color: #65676b; font-weight: 500; } /* Sidebar Navigation */ .nav-btn { text-align: left !important; justify-content: flex-start !important; background: transparent !important; border: none !important; box-shadow: none !important; color: #65676b !important; font-weight: 600 !important; font-size: 16px !important; padding: 12px 16px !important; border-radius: 10px !important; transition: all 0.2s ease; } .nav-btn:hover { background-color: #e4e6eb !important; color: #050505 !important; } .nav-btn.selected { background-color: #e7f3ff !important; color: #1877f2 !important; border-left: 4px solid #1877f2 !important; } /* Feed Styling */ #feed-container { gap: 0px !important; padding: 0px !important; } #feed-container > .form { gap: 0px !important; } .content-card { background-color: #ffffff !important; background: #ffffff !important; border-radius: 12px; box-shadow: 0 1px 2px rgba(0,0,0,0.1); border: 1px solid #ddd; padding: 20px; margin-bottom: 7px !important; margin-top: 0px !important; width: 100%; display: block; } /* Similar Recipe Cards */ .sim-card { background: #fff; border: 1px solid #eee; border-radius: 8px; padding: 15px; height: 100%; border-top: 4px solid #1877f2; display: flex; flex-direction: column; justify-content: space-between; } .sim-scroll { height: 400px; overflow-y: auto; margin-bottom: 10px; padding-right: 5px; font-size: 14px; color: #4b4f56; } .trend-box { background:white; padding:10px; border-radius:8px; margin-bottom:10px; box-shadow:0 1px 2px rgba(0,0,0,0.1); transition: background 0.2s; } .trend-box:hover { background: #f0f2f5; cursor: pointer; } /* Contact List Styling */ .contact-item { display: flex; align-items: center; padding: 10px; background: white; border-radius: 8px; margin-bottom: 8px; box-shadow: 0 1px 2px rgba(0,0,0,0.1); cursor: pointer; transition: 0.2s; } .contact-item:hover { background: #f0f2f5; } .avatar-circle { width: 36px; height: 36px; background: #e4e6eb; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-weight: 700; color: #050505; margin-right: 12px; position: relative; font-size: 13px; border: 1px solid #ddd; } .status-badge { width: 12px; height: 12px; border-radius: 50%; border: 2px solid white; position: absolute; bottom: -1px; right: -2px; } .status-green { background-color: #31a24c; } .status-red { background-color: #f02849; } .gap-fix { gap: 25px !important; } .gradio-examples { display: flex; justify-content: center; width: 100%; } /* EXAMPLE IMAGES STYLING UPDATED */ button.gallery-item { transition: transform 0.2s ease, box-shadow 0.2s ease !important; z-index: 1; width: 80px !important; /* Force width */ height: 80px !important; /* Force height */ overflow: hidden !important; } button.gallery-item img { width: 100% !important; height: 100% !important; object-fit: cover !important; } button.gallery-item:hover { transform: scale(2.5) !important; z-index: 1000 !important; box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important; border: 2px solid white !important; border-radius: 8px !important; } """ # ========================================== # 5. LAYOUT CONSTRUCTION # ========================================== with gr.Blocks(title="CookBook AI") as demo: # --- HEADER --- gr.HTML(f"""
CookBook AI Turning Handwritten Recipes into a Digital Recipe.
v4.4
""") with gr.Row(): # --- LEFT SIDEBAR --- with gr.Column(scale=1, min_width=200): gr.HTML(f"""
My Profile
""") gr.HTML("
") nav_digital = gr.Button("✨ AI Digitizer", elem_classes=["nav-btn", "selected"]) nav_feed = gr.Button("📰 News Feed", elem_classes=["nav-btn"]) nav_about = gr.Button("ℹ️ About", elem_classes=["nav-btn"]) # --- CENTER CONTENT --- with gr.Column(scale=3): # === VIEW 1: AI DIGITALIZER === with gr.Group(visible=True) as digitalizer_view: with gr.Row(elem_classes=["gap-fix"]): with gr.Column(scale=1): with gr.Group(elem_classes=["content-card"]): input_img = gr.Image(type="filepath", label="Upload", height=300) magic_btn = gr.Button("✨ Convert to Digital", variant="primary", size="lg") # --- UPDATED EXAMPLES HERE --- gr.Examples( examples=[ ["quick_tries_images/applecrisp.jpg"], ["quick_tries_images/meatballs recipe.jpg"], ["quick_tries_images/Apple Dapple (aka Fresh Apple Cake).jfif"] ], inputs=input_img, label="Or try these examples:", cache_examples=False ) with gr.Column(scale=1): with gr.Group(elem_classes=["content-card"]): out_text = gr.Textbox(label="Result", value="Here your digitalized recipe will be presented", lines=20, interactive=False, show_label=False) gr.HTML("
") gr.Markdown("### 3. Similar Recipes from Database") with gr.Row(): with gr.Column(elem_classes=["sim-card"]) as c1_box: sim1 = gr.Markdown("Once you will upload your scanned recipe, we will share similar recipes!") with gr.Row(visible=False) as c1_btns: gr.Button("👍 Like", size="sm", variant="secondary") gr.Button("↗️ Share", size="sm", variant="secondary") with gr.Column(elem_classes=["sim-card"], visible=False) as c2_box: sim2 = gr.Markdown("") with gr.Row(): gr.Button("👍 Like", size="sm", variant="secondary") gr.Button("↗️ Share", size="sm", variant="secondary") with gr.Column(elem_classes=["sim-card"], visible=False) as c3_box: sim3 = gr.Markdown("") with gr.Row(): gr.Button("👍 Like", size="sm", variant="secondary") gr.Button("↗️ Share", size="sm", variant="secondary") magic_btn.click(ui_update_pipeline, input_img, [out_text, sim1, c1_btns, c2_box, sim2, c3_box, sim3]) # === VIEW 2: FEED === with gr.Column(visible=False, elem_id="feed-container") as feed_view: if not df_recipes.empty: feed_samples = df_recipes.sample(10) for index, row in feed_samples.iterrows(): user_name = random.choice(["Grandma Rose", "Chef Mike", "Sarah J."]) emoji = random.choice(["🥘", "🥗", "🍰", "🌮"]) time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"] post_time = random.choice(time_options) raw_desc = str(row.get('Raw_Output', 'Delicious recipe...'))[:250] title_feed = row.get('Title', 'Recipe') with gr.Group(elem_classes=["content-card"]): gr.HTML(f"""
{emoji}
{user_name}
{post_time} · 🌍 Public
""") gr.Markdown(f"### {title_feed}") gr.Markdown(f"{raw_desc}...") with gr.Row(): gr.Button("👍 Like", size="sm", variant="secondary") gr.Button("💬 Comment", size="sm", variant="secondary") gr.Button("↗️ Share", size="sm", variant="secondary") else: gr.Markdown("⚠️ Database is empty.") # === VIEW 3: ABOUT (UPDATED) === with gr.Group(visible=False) as about_view: with gr.Group(elem_classes=["content-card"]): gr.Markdown(""" # Goal Project The goal of this project is to develop an app that takes a scanned image of a handwritten recipe as input, generates text using a VLM, and based on the extracted text, suggests 3 similar recipes from a 10K dataset of synthetic recipes. Our app will bridge the gap between analog culinary heritage and digital discovery. ### About Us This app was developed by **Shahar Firshtman** and **Lior Feinstein**, 2nd year students for Economics and data science. """) # Process Image gr.HTML(f"""

Process Overview

""") # --- RIGHT COLUMN --- with gr.Column(scale=1, min_width=200): # 1. Trending gr.Markdown("### Trending Recipes") def trend_box(title, likes): return f"
{title}
{likes} likes
" gr.HTML(trend_box("🍜 Ramen Hack", "12k") + trend_box("🍪 Best Cookies", "8k") + trend_box("🍰 Cheese Cake", "15k") + trend_box("🍪 Nana's Tahini Cookies", "9k")) # 2. Contacts gr.HTML("
") gr.Markdown("### Quick Contacts") def contact_box(name, initials, status_color): return f"""
{initials}
{name}
""" contact_html = ( contact_box("Elon Musk", "EM", "green") + contact_box("Gordon Ramsay", "GR", "red") + contact_box("Guy Fieri", "GF", "green") + contact_box("Bobby Flay", "BF", "red") ) gr.HTML(contact_html) # ========================================== # 6. JAVASCRIPT LOGIC # ========================================== def go_digi(): return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"])) def go_feed(): return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"])) def go_about(): return (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"])) outputs_ui = [digitalizer_view, feed_view, about_view, nav_digital, nav_feed, nav_about] nav_digital.click(go_digi, None, outputs_ui) nav_feed.click(go_feed, None, outputs_ui) nav_about.click(go_about, None, outputs_ui) if __name__ == "__main__": demo.launch(theme=theme, css=modern_css)