Liori25 commited on
Commit
6bd2165
·
verified ·
1 Parent(s): 944703d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -15
app.py CHANGED
@@ -5,7 +5,9 @@ import numpy as np
5
  import os
6
  from huggingface_hub import InferenceClient
7
  from sklearn.metrics.pairwise import cosine_similarity
8
- from recipe_pipeline import RecipeDigitalizerPipeline # Ensure your pipeline file is named recipe_pipeline.py
 
 
9
 
10
  # ==========================================
11
  # 1. SETUP API CLIENT
@@ -17,14 +19,23 @@ client = InferenceClient(token=os.getenv("HF_TOKEN"))
17
  print("⏳ Loading Datasets...")
18
 
19
  # Load the Pre-computed Embeddings
20
- # We use standard pickle loading. Since we saved numpy arrays, we don't need torch.
21
- with open('recipe_embeddings.pkl', 'rb') as f:
22
- data_bundle = pickle.load(f)
23
- stored_embeddings = data_bundle['embeddings'] # This is a numpy matrix
24
-
25
- # Load the CSV Dataset
26
- df_recipes = pd.read_csv('RecipeData_10K.csv')
27
- print("✅ Resources Loaded Successfully!")
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  # ==========================================
@@ -36,8 +47,7 @@ def get_embedding_via_api(text):
36
  try:
37
  # We use the feature_extraction task
38
  response = client.feature_extraction(text, model=API_MODEL)
39
- # The API returns a list of floats (or list of list). We convert to numpy.
40
- # Note: BGE-Small is 384 dimensions.
41
  return np.array(response)
42
  except Exception as e:
43
  print(f"API Error: {e}")
@@ -70,7 +80,9 @@ def format_recipe_text(json_data):
70
 
71
  def find_similar_recipes(user_query_text):
72
  """Finds recipes using API embeddings + Scikit-Learn (No Torch)."""
73
-
 
 
74
  # 1. Get Embedding from API
75
  instruction = "Represent this recipe for retrieving similar dishes: "
76
  query_vec = get_embedding_via_api(instruction + user_query_text)
@@ -83,7 +95,6 @@ def find_similar_recipes(user_query_text):
83
  query_vec = query_vec.reshape(1, -1)
84
 
85
  # 2. Calculate Cosine Similarity (using Numpy/Scikit, very fast)
86
- # stored_embeddings is (10000, 384)
87
  scores = cosine_similarity(query_vec, stored_embeddings)[0]
88
 
89
  # 3. Get Top 3 Indices using Numpy
@@ -111,8 +122,11 @@ def find_similar_recipes(user_query_text):
111
 
112
  def magic_pipeline(image_path):
113
  # Step 1: Image -> Text (API)
114
- digitizer = RecipeDigitalizerPipeline()
115
- json_result = digitizer.run_pipeline(image_path)
 
 
 
116
 
117
  # Step 2: Format
118
  readable_text, query_text = format_recipe_text(json_result)
 
5
  import os
6
  from huggingface_hub import InferenceClient
7
  from sklearn.metrics.pairwise import cosine_similarity
8
+
9
+ # --- FIX: Import from YOUR file name (IO_pipeline) ---
10
+ from IO_pipeline import RecipeDigitalizerPipeline
11
 
12
  # ==========================================
13
  # 1. SETUP API CLIENT
 
19
  print("⏳ Loading Datasets...")
20
 
21
  # Load the Pre-computed Embeddings
22
+ # We use standard pickle loading.
23
+ try:
24
+ with open('recipe_embeddings.pkl', 'rb') as f:
25
+ data_bundle = pickle.load(f)
26
+ # Ensure we get the matrix (handle both dict and list formats)
27
+ if isinstance(data_bundle, dict):
28
+ stored_embeddings = data_bundle['embeddings']
29
+ else:
30
+ stored_embeddings = data_bundle
31
+
32
+ # Load the CSV Dataset
33
+ df_recipes = pd.read_csv('RecipeData_10K.csv')
34
+ print("✅ Resources Loaded Successfully!")
35
+ except FileNotFoundError as e:
36
+ print(f"❌ CRITICAL ERROR: Missing file {e.filename}")
37
+ stored_embeddings = None
38
+ df_recipes = pd.DataFrame()
39
 
40
 
41
  # ==========================================
 
47
  try:
48
  # We use the feature_extraction task
49
  response = client.feature_extraction(text, model=API_MODEL)
50
+ # The API returns a list of floats. We convert to numpy.
 
51
  return np.array(response)
52
  except Exception as e:
53
  print(f"API Error: {e}")
 
80
 
81
  def find_similar_recipes(user_query_text):
82
  """Finds recipes using API embeddings + Scikit-Learn (No Torch)."""
83
+ if stored_embeddings is None:
84
+ return "❌ Error: Embeddings file not loaded."
85
+
86
  # 1. Get Embedding from API
87
  instruction = "Represent this recipe for retrieving similar dishes: "
88
  query_vec = get_embedding_via_api(instruction + user_query_text)
 
95
  query_vec = query_vec.reshape(1, -1)
96
 
97
  # 2. Calculate Cosine Similarity (using Numpy/Scikit, very fast)
 
98
  scores = cosine_similarity(query_vec, stored_embeddings)[0]
99
 
100
  # 3. Get Top 3 Indices using Numpy
 
122
 
123
  def magic_pipeline(image_path):
124
  # Step 1: Image -> Text (API)
125
+ try:
126
+ digitizer = RecipeDigitalizerPipeline()
127
+ json_result = digitizer.run_pipeline(image_path)
128
+ except Exception as e:
129
+ return f"Error in IO_pipeline: {e}", ""
130
 
131
  # Step 2: Format
132
  readable_text, query_text = format_recipe_text(json_result)