Spaces:

Liori25
/

CookBookAI

Sleeping

App Files Files Community

Liori25 commited on Jan 14

Commit

944703d

verified ·

1 Parent(s): d988bca

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -44

app.py CHANGED Viewed

@@ -1,27 +1,28 @@
 import gradio as gr
 import pandas as pd
 import pickle
-import torch
-from sentence_transformers import SentenceTransformer, util
-from IO_pipeline import RecipeDigitalizerPipeline  # Importing your image pipeline
 # ==========================================
-# 1. LOAD RESOURCES (Run once on startup)
 # ==========================================
-print("⏳ Loading Models and Datasets...")
-# A. Load the Sentence Transformer Model (for embedding the NEW recipe)
-model_name = 'BAAI/bge-small-en-v1.5'
-embedding_model = SentenceTransformer(model_name)
-# B. Load the Pre-computed Embeddings
 with open('recipe_embeddings.pkl', 'rb') as f:
     data_bundle = pickle.load(f)
-    # Extract the matrix of vectors (Assuming dict format from previous step)
-    # If you saved just the dataframe, adjust to: stored_embeddings = data_bundle['embedding'].tolist()
-    stored_embeddings = data_bundle['embeddings']
-# C. Load the CSV Dataset (For displaying recipe details)
 df_recipes = pd.read_csv('RecipeData_10K.csv')
 print("✅ Resources Loaded Successfully!")
@@ -30,12 +31,23 @@ print("✅ Resources Loaded Successfully!")
 # 2. CORE FUNCTIONS
 # ==========================================
 def format_recipe_text(json_data):
     """Converts the JSON output into a readable string."""
     if "error" in json_data:
         return f"Error: {json_data['error']}", ""
-    # Extract fields with safe fallbacks
     title = json_data.get("title", "Unknown Recipe")
     cuisine = json_data.get("cuisine_type", "General")
     difficulty = json_data.get("difficulty", "Medium")
@@ -43,7 +55,6 @@ def format_recipe_text(json_data):
     ingredients = "\n".join([f"- {item}" for item in json_data.get("ingredients", [])])
     instructions = "\n".join([f"{i+1}. {step}" for i, step in enumerate(json_data.get("instructions", []))])
-    # 1. Readable Text Block
     display_text = (
         f"🍽️ RECIPE: {title}\n"
         f"================================\n"
@@ -53,37 +64,40 @@ def format_recipe_text(json_data):
         f"🍳 INSTRUCTIONS:\n{instructions}"
     )
-    # 2. Search Query (Plain text for the AI model)
     search_query = f"{title} {cuisine} {ingredients} {instructions}"
     return display_text, search_query
 def find_similar_recipes(user_query_text):
-    """Embeds the user's recipe and finds the top 3 matches."""
-    # 1. Embed the new recipe text
-    # BGE model works best with instruction for queries
     instruction = "Represent this recipe for retrieving similar dishes: "
-    query_embedding = embedding_model.encode(instruction + user_query_text, convert_to_tensor=True)
-    # 2. Compute Cosine Similarity
-    # stored_embeddings must be converted to tensor if it isn't already
-    corpus_embeddings = torch.tensor(stored_embeddings)
-    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
-    # 3. Get Top 3 Results
-    top_results = torch.topk(cos_scores, k=3)
     recommendations = ""
-    for score, idx in zip(top_results.values, top_results.indices):
-        idx = int(idx)
         row = df_recipes.iloc[idx]
         rec_title = row['Title']
-        # Try to get raw output or construct a summary
-        rec_desc = row['Raw_Output'] if 'Raw_Output' in row else "No description available."
-        # Truncate description for display
         rec_desc_short = rec_desc[:200] + "..." if len(rec_desc) > 200 else rec_desc
         recommendations += (
@@ -96,16 +110,16 @@ def find_similar_recipes(user_query_text):
     return recommendations
 def magic_pipeline(image_path):
-    # Step 1: Image -> Text (Using your imported IO_pipeline)
     digitizer = RecipeDigitalizerPipeline()
     json_result = digitizer.run_pipeline(image_path)
-    # Step 2: Format Text for User
     readable_text, query_text = format_recipe_text(json_result)
-    # Step 3: Find Similar Recipes (only if we have valid text)
     if not query_text:
-        return readable_text, "Could not search for similar recipes due to extraction error."
     similar_recipes_text = find_similar_recipes(query_text)
@@ -113,7 +127,7 @@ def magic_pipeline(image_path):
 # ==========================================
-# 3. GRADIO UI LAYOUT
 # ==========================================
 custom_css = """
@@ -122,8 +136,8 @@ custom_css = """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 👵 Legacy Kitchen")
-    gr.Markdown("Upload a photo of your handwritten family recipe. We will digitize it and find similar recipes from our database!")
     with gr.Row():
         with gr.Column():
@@ -131,19 +145,14 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
             submit_btn = gr.Button("✨ Digitize & Find Similar", variant="primary")
         with gr.Column():
-            # Section 3 & 4: Output Text (Digitized)
             output_digitized = gr.Textbox(label="📖 Digitized Recipe", lines=10)
-            # Section 5: Similar Recipes Output
             output_recommendations = gr.Textbox(label="🥗 3 Similar Recipes Found", lines=10)
-    # Click Event
     submit_btn.click(
         fn=magic_pipeline,
         inputs=input_image,
         outputs=[output_digitized, output_recommendations]
     )
-# Launch App
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import pandas as pd
 import pickle
+import numpy as np
+import os
+from huggingface_hub import InferenceClient
+from sklearn.metrics.pairwise import cosine_similarity
+from recipe_pipeline import RecipeDigitalizerPipeline # Ensure your pipeline file is named recipe_pipeline.py
 # ==========================================
+# 1. SETUP API CLIENT
 # ==========================================
+# We use the API for the embedding model too! No local heavy models.
+API_MODEL = "BAAI/bge-small-en-v1.5"
+client = InferenceClient(token=os.getenv("HF_TOKEN"))
+print("⏳ Loading Datasets...")
+# Load the Pre-computed Embeddings
+# We use standard pickle loading. Since we saved numpy arrays, we don't need torch.
 with open('recipe_embeddings.pkl', 'rb') as f:
     data_bundle = pickle.load(f)
+    stored_embeddings = data_bundle['embeddings'] # This is a numpy matrix
+# Load the CSV Dataset
 df_recipes = pd.read_csv('RecipeData_10K.csv')
 print("✅ Resources Loaded Successfully!")
 # 2. CORE FUNCTIONS
 # ==========================================
+def get_embedding_via_api(text):
+    """Sends text to HF API and gets back the vector."""
+    try:
+        # We use the feature_extraction task
+        response = client.feature_extraction(text, model=API_MODEL)
+        # The API returns a list of floats (or list of list). We convert to numpy.
+        # Note: BGE-Small is 384 dimensions.
+        return np.array(response)
+    except Exception as e:
+        print(f"API Error: {e}")
+        return None
 def format_recipe_text(json_data):
     """Converts the JSON output into a readable string."""
     if "error" in json_data:
         return f"Error: {json_data['error']}", ""
     title = json_data.get("title", "Unknown Recipe")
     cuisine = json_data.get("cuisine_type", "General")
     difficulty = json_data.get("difficulty", "Medium")
     ingredients = "\n".join([f"- {item}" for item in json_data.get("ingredients", [])])
     instructions = "\n".join([f"{i+1}. {step}" for i, step in enumerate(json_data.get("instructions", []))])
     display_text = (
         f"🍽️ RECIPE: {title}\n"
         f"================================\n"
         f"🍳 INSTRUCTIONS:\n{instructions}"
     )
+    # Text for the AI to search with
     search_query = f"{title} {cuisine} {ingredients} {instructions}"
     return display_text, search_query
 def find_similar_recipes(user_query_text):
+    """Finds recipes using API embeddings + Scikit-Learn (No Torch)."""
+    # 1. Get Embedding from API
     instruction = "Represent this recipe for retrieving similar dishes: "
+    query_vec = get_embedding_via_api(instruction + user_query_text)
+    if query_vec is None:
+        return "❌ Error: Could not reach Hugging Face API for embeddings."
+    # Ensure query_vec is 2D for scikit-learn (1, 384)
+    if len(query_vec.shape) == 1:
+        query_vec = query_vec.reshape(1, -1)
+    # 2. Calculate Cosine Similarity (using Numpy/Scikit, very fast)
+    # stored_embeddings is (10000, 384)
+    scores = cosine_similarity(query_vec, stored_embeddings)[0]
+    # 3. Get Top 3 Indices using Numpy
+    # argsort gives lowest first, so we take last 3 and reverse
+    top_indices = scores.argsort()[-3:][::-1]
     recommendations = ""
+    for idx in top_indices:
+        score = scores[idx]
         row = df_recipes.iloc[idx]
         rec_title = row['Title']
+        rec_desc = str(row['Raw_Output']) # Safe conversion
         rec_desc_short = rec_desc[:200] + "..." if len(rec_desc) > 200 else rec_desc
         recommendations += (
     return recommendations
 def magic_pipeline(image_path):
+    # Step 1: Image -> Text (API)
     digitizer = RecipeDigitalizerPipeline()
     json_result = digitizer.run_pipeline(image_path)
+    # Step 2: Format
     readable_text, query_text = format_recipe_text(json_result)
+    # Step 3: Text -> Similarity (API)
     if not query_text:
+        return readable_text, "Could not search."
     similar_recipes_text = find_similar_recipes(query_text)
 # ==========================================
+# 3. GRADIO UI
 # ==========================================
 custom_css = """
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 👵 Legacy Kitchen (Cloud API Edition)")
+    gr.Markdown("Upload a handwritten recipe. We digitize it and find matches using Hugging Face Serverless API.")
     with gr.Row():
         with gr.Column():
             submit_btn = gr.Button("✨ Digitize & Find Similar", variant="primary")
         with gr.Column():
             output_digitized = gr.Textbox(label="📖 Digitized Recipe", lines=10)
             output_recommendations = gr.Textbox(label="🥗 3 Similar Recipes Found", lines=10)
     submit_btn.click(
         fn=magic_pipeline,
         inputs=input_image,
         outputs=[output_digitized, output_recommendations]
     )
 if __name__ == "__main__":
     demo.launch()