Liori25 commited on
Commit
944703d
Β·
verified Β·
1 Parent(s): d988bca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -44
app.py CHANGED
@@ -1,27 +1,28 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import pickle
4
- import torch
5
- from sentence_transformers import SentenceTransformer, util
6
- from IO_pipeline import RecipeDigitalizerPipeline # Importing your image pipeline
 
 
7
 
8
  # ==========================================
9
- # 1. LOAD RESOURCES (Run once on startup)
10
  # ==========================================
11
- print("⏳ Loading Models and Datasets...")
 
 
12
 
13
- # A. Load the Sentence Transformer Model (for embedding the NEW recipe)
14
- model_name = 'BAAI/bge-small-en-v1.5'
15
- embedding_model = SentenceTransformer(model_name)
16
 
17
- # B. Load the Pre-computed Embeddings
 
18
  with open('recipe_embeddings.pkl', 'rb') as f:
19
  data_bundle = pickle.load(f)
20
- # Extract the matrix of vectors (Assuming dict format from previous step)
21
- # If you saved just the dataframe, adjust to: stored_embeddings = data_bundle['embedding'].tolist()
22
- stored_embeddings = data_bundle['embeddings']
23
 
24
- # C. Load the CSV Dataset (For displaying recipe details)
25
  df_recipes = pd.read_csv('RecipeData_10K.csv')
26
  print("βœ… Resources Loaded Successfully!")
27
 
@@ -30,12 +31,23 @@ print("βœ… Resources Loaded Successfully!")
30
  # 2. CORE FUNCTIONS
31
  # ==========================================
32
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def format_recipe_text(json_data):
34
  """Converts the JSON output into a readable string."""
35
  if "error" in json_data:
36
  return f"Error: {json_data['error']}", ""
37
 
38
- # Extract fields with safe fallbacks
39
  title = json_data.get("title", "Unknown Recipe")
40
  cuisine = json_data.get("cuisine_type", "General")
41
  difficulty = json_data.get("difficulty", "Medium")
@@ -43,7 +55,6 @@ def format_recipe_text(json_data):
43
  ingredients = "\n".join([f"- {item}" for item in json_data.get("ingredients", [])])
44
  instructions = "\n".join([f"{i+1}. {step}" for i, step in enumerate(json_data.get("instructions", []))])
45
 
46
- # 1. Readable Text Block
47
  display_text = (
48
  f"🍽️ RECIPE: {title}\n"
49
  f"================================\n"
@@ -53,37 +64,40 @@ def format_recipe_text(json_data):
53
  f"🍳 INSTRUCTIONS:\n{instructions}"
54
  )
55
 
56
- # 2. Search Query (Plain text for the AI model)
57
  search_query = f"{title} {cuisine} {ingredients} {instructions}"
58
-
59
  return display_text, search_query
60
 
61
  def find_similar_recipes(user_query_text):
62
- """Embeds the user's recipe and finds the top 3 matches."""
63
 
64
- # 1. Embed the new recipe text
65
- # BGE model works best with instruction for queries
66
  instruction = "Represent this recipe for retrieving similar dishes: "
67
- query_embedding = embedding_model.encode(instruction + user_query_text, convert_to_tensor=True)
68
 
69
- # 2. Compute Cosine Similarity
70
- # stored_embeddings must be converted to tensor if it isn't already
71
- corpus_embeddings = torch.tensor(stored_embeddings)
72
- cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
 
 
 
 
 
 
73
 
74
- # 3. Get Top 3 Results
75
- top_results = torch.topk(cos_scores, k=3)
 
76
 
77
  recommendations = ""
78
 
79
- for score, idx in zip(top_results.values, top_results.indices):
80
- idx = int(idx)
81
  row = df_recipes.iloc[idx]
82
 
83
  rec_title = row['Title']
84
- # Try to get raw output or construct a summary
85
- rec_desc = row['Raw_Output'] if 'Raw_Output' in row else "No description available."
86
- # Truncate description for display
87
  rec_desc_short = rec_desc[:200] + "..." if len(rec_desc) > 200 else rec_desc
88
 
89
  recommendations += (
@@ -96,16 +110,16 @@ def find_similar_recipes(user_query_text):
96
  return recommendations
97
 
98
  def magic_pipeline(image_path):
99
- # Step 1: Image -> Text (Using your imported IO_pipeline)
100
  digitizer = RecipeDigitalizerPipeline()
101
  json_result = digitizer.run_pipeline(image_path)
102
 
103
- # Step 2: Format Text for User
104
  readable_text, query_text = format_recipe_text(json_result)
105
 
106
- # Step 3: Find Similar Recipes (only if we have valid text)
107
  if not query_text:
108
- return readable_text, "Could not search for similar recipes due to extraction error."
109
 
110
  similar_recipes_text = find_similar_recipes(query_text)
111
 
@@ -113,7 +127,7 @@ def magic_pipeline(image_path):
113
 
114
 
115
  # ==========================================
116
- # 3. GRADIO UI LAYOUT
117
  # ==========================================
118
 
119
  custom_css = """
@@ -122,8 +136,8 @@ custom_css = """
122
 
123
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
124
 
125
- gr.Markdown("# πŸ‘΅ Legacy Kitchen")
126
- gr.Markdown("Upload a photo of your handwritten family recipe. We will digitize it and find similar recipes from our database!")
127
 
128
  with gr.Row():
129
  with gr.Column():
@@ -131,19 +145,14 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
131
  submit_btn = gr.Button("✨ Digitize & Find Similar", variant="primary")
132
 
133
  with gr.Column():
134
- # Section 3 & 4: Output Text (Digitized)
135
  output_digitized = gr.Textbox(label="πŸ“– Digitized Recipe", lines=10)
136
-
137
- # Section 5: Similar Recipes Output
138
  output_recommendations = gr.Textbox(label="πŸ₯— 3 Similar Recipes Found", lines=10)
139
 
140
- # Click Event
141
  submit_btn.click(
142
  fn=magic_pipeline,
143
  inputs=input_image,
144
  outputs=[output_digitized, output_recommendations]
145
  )
146
 
147
- # Launch App
148
  if __name__ == "__main__":
149
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import pickle
4
+ import numpy as np
5
+ import os
6
+ from huggingface_hub import InferenceClient
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ from recipe_pipeline import RecipeDigitalizerPipeline # Ensure your pipeline file is named recipe_pipeline.py
9
 
10
  # ==========================================
11
+ # 1. SETUP API CLIENT
12
  # ==========================================
13
+ # We use the API for the embedding model too! No local heavy models.
14
+ API_MODEL = "BAAI/bge-small-en-v1.5"
15
+ client = InferenceClient(token=os.getenv("HF_TOKEN"))
16
 
17
+ print("⏳ Loading Datasets...")
 
 
18
 
19
+ # Load the Pre-computed Embeddings
20
+ # We use standard pickle loading. Since we saved numpy arrays, we don't need torch.
21
  with open('recipe_embeddings.pkl', 'rb') as f:
22
  data_bundle = pickle.load(f)
23
+ stored_embeddings = data_bundle['embeddings'] # This is a numpy matrix
 
 
24
 
25
+ # Load the CSV Dataset
26
  df_recipes = pd.read_csv('RecipeData_10K.csv')
27
  print("βœ… Resources Loaded Successfully!")
28
 
 
31
  # 2. CORE FUNCTIONS
32
  # ==========================================
33
 
34
+ def get_embedding_via_api(text):
35
+ """Sends text to HF API and gets back the vector."""
36
+ try:
37
+ # We use the feature_extraction task
38
+ response = client.feature_extraction(text, model=API_MODEL)
39
+ # The API returns a list of floats (or list of list). We convert to numpy.
40
+ # Note: BGE-Small is 384 dimensions.
41
+ return np.array(response)
42
+ except Exception as e:
43
+ print(f"API Error: {e}")
44
+ return None
45
+
46
  def format_recipe_text(json_data):
47
  """Converts the JSON output into a readable string."""
48
  if "error" in json_data:
49
  return f"Error: {json_data['error']}", ""
50
 
 
51
  title = json_data.get("title", "Unknown Recipe")
52
  cuisine = json_data.get("cuisine_type", "General")
53
  difficulty = json_data.get("difficulty", "Medium")
 
55
  ingredients = "\n".join([f"- {item}" for item in json_data.get("ingredients", [])])
56
  instructions = "\n".join([f"{i+1}. {step}" for i, step in enumerate(json_data.get("instructions", []))])
57
 
 
58
  display_text = (
59
  f"🍽️ RECIPE: {title}\n"
60
  f"================================\n"
 
64
  f"🍳 INSTRUCTIONS:\n{instructions}"
65
  )
66
 
67
+ # Text for the AI to search with
68
  search_query = f"{title} {cuisine} {ingredients} {instructions}"
 
69
  return display_text, search_query
70
 
71
  def find_similar_recipes(user_query_text):
72
+ """Finds recipes using API embeddings + Scikit-Learn (No Torch)."""
73
 
74
+ # 1. Get Embedding from API
 
75
  instruction = "Represent this recipe for retrieving similar dishes: "
76
+ query_vec = get_embedding_via_api(instruction + user_query_text)
77
 
78
+ if query_vec is None:
79
+ return "❌ Error: Could not reach Hugging Face API for embeddings."
80
+
81
+ # Ensure query_vec is 2D for scikit-learn (1, 384)
82
+ if len(query_vec.shape) == 1:
83
+ query_vec = query_vec.reshape(1, -1)
84
+
85
+ # 2. Calculate Cosine Similarity (using Numpy/Scikit, very fast)
86
+ # stored_embeddings is (10000, 384)
87
+ scores = cosine_similarity(query_vec, stored_embeddings)[0]
88
 
89
+ # 3. Get Top 3 Indices using Numpy
90
+ # argsort gives lowest first, so we take last 3 and reverse
91
+ top_indices = scores.argsort()[-3:][::-1]
92
 
93
  recommendations = ""
94
 
95
+ for idx in top_indices:
96
+ score = scores[idx]
97
  row = df_recipes.iloc[idx]
98
 
99
  rec_title = row['Title']
100
+ rec_desc = str(row['Raw_Output']) # Safe conversion
 
 
101
  rec_desc_short = rec_desc[:200] + "..." if len(rec_desc) > 200 else rec_desc
102
 
103
  recommendations += (
 
110
  return recommendations
111
 
112
  def magic_pipeline(image_path):
113
+ # Step 1: Image -> Text (API)
114
  digitizer = RecipeDigitalizerPipeline()
115
  json_result = digitizer.run_pipeline(image_path)
116
 
117
+ # Step 2: Format
118
  readable_text, query_text = format_recipe_text(json_result)
119
 
120
+ # Step 3: Text -> Similarity (API)
121
  if not query_text:
122
+ return readable_text, "Could not search."
123
 
124
  similar_recipes_text = find_similar_recipes(query_text)
125
 
 
127
 
128
 
129
  # ==========================================
130
+ # 3. GRADIO UI
131
  # ==========================================
132
 
133
  custom_css = """
 
136
 
137
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
138
 
139
+ gr.Markdown("# πŸ‘΅ Legacy Kitchen (Cloud API Edition)")
140
+ gr.Markdown("Upload a handwritten recipe. We digitize it and find matches using Hugging Face Serverless API.")
141
 
142
  with gr.Row():
143
  with gr.Column():
 
145
  submit_btn = gr.Button("✨ Digitize & Find Similar", variant="primary")
146
 
147
  with gr.Column():
 
148
  output_digitized = gr.Textbox(label="πŸ“– Digitized Recipe", lines=10)
 
 
149
  output_recommendations = gr.Textbox(label="πŸ₯— 3 Similar Recipes Found", lines=10)
150
 
 
151
  submit_btn.click(
152
  fn=magic_pipeline,
153
  inputs=input_image,
154
  outputs=[output_digitized, output_recommendations]
155
  )
156
 
 
157
  if __name__ == "__main__":
158
  demo.launch()