TheHuriShow commited on
Commit
f842f30
·
verified ·
1 Parent(s): 1a67f51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -50
app.py CHANGED
@@ -6,7 +6,7 @@ import numpy as np
6
  from transformers import pipeline
7
  import time
8
 
9
- # --- 1. DATA LOADING AND PREPROCESSING ---
10
  print("===== Application Startup =====")
11
  start_time = time.time()
12
 
@@ -20,29 +20,33 @@ def extract_title_and_ingredients(sample):
20
 
21
  def extract_each_feature(sample):
22
  """
23
- FIXED: Extract each feature of a recipe from a sample and clean up potential duplications.
24
  """
25
  full_text = sample['input']
26
 
 
27
  title = full_text[:full_text.find("\\n")]
28
 
29
- # Find the start and end of the ingredients block
30
- ingredients_start = full_text.find("Ingredients:")
31
- directions_start = full_text.find("Directions:")
32
-
33
- # Extract ingredients cleanly
34
- if ingredients_start != -1 and directions_start != -1:
35
- ingredients = full_text[ingredients_start + len("Ingredients:"):directions_start].strip()
36
- else:
37
- ingredients = "Not available"
38
-
39
- # Extract directions and clean them
40
- if directions_start != -1:
41
- directions_raw = full_text[directions_start + len("Directions:"):].strip()
42
- # FIX: Split by "Ingredients:" to remove any duplicated content
43
- directions = directions_raw.split("Ingredients:")[0].strip()
44
- else:
45
- directions = "Not available"
 
 
 
46
 
47
  return {
48
  "title": title,
@@ -70,9 +74,10 @@ index = faiss.IndexFlatL2(dimension)
70
  index.add(embeddings)
71
  print(f"Index is ready. Total vectors in index: {index.ntotal}")
72
 
73
- # --- 3. SYNTHETIC GENERATION ---
74
  print("Loading generative model...")
75
- generator = pipeline('text-generation', model='distilgpt2')
 
76
 
77
  def get_recommendations_and_generate(query_ingredients, k=3):
78
  # 1. Get Recommendations
@@ -90,43 +95,54 @@ def get_recommendations_and_generate(query_ingredients, k=3):
90
  }
91
  results.append(recipe)
92
 
93
- # 2. Generate a new recipe with a structured "few-shot" prompt
94
- prompt = f"""Create a full recipe including a title, ingredients, and directions based on the following items: {query_ingredients}.
95
-
96
- ### Title:
97
- [Recipe Title]
98
-
99
- ### Ingredients:
100
- - [Ingredient 1]
101
- - [Ingredient 2]
102
- - [Ingredient 3]
103
-
104
- ### Directions:
105
- 1. [Step 1]
106
- 2. [Step 2]
107
- 3. [Step 3]
108
- ---
109
- Recipe:
110
- ### Title:
111
  """
112
  # Generate the recipe text
113
- generated_outputs = generator(prompt, max_new_tokens=200, num_return_sequences=1)
114
  generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
115
 
116
- # 3. Parse the generated text into a structured format
117
  try:
118
- title_part, rest = generated_text.split("### Ingredients:", 1)
119
- ingredients_part, directions_part = rest.split("### Directions:", 1)
120
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  generated_recipe = {
122
- "title": title_part.strip(),
123
- "ingredients": ingredients_part.strip(),
124
- "directions": directions_part.strip()
125
  }
126
- except ValueError:
127
- # Fallback if the model doesn't follow the format perfectly
128
  generated_recipe = {
129
- "title": "AI Generated Recipe",
130
  "ingredients": "Could not determine ingredients.",
131
  "directions": generated_text
132
  }
 
6
  from transformers import pipeline
7
  import time
8
 
9
+ # --- 1. DATA LOADING AND PREPROCESSING (FINAL FIX) ---
10
  print("===== Application Startup =====")
11
  start_time = time.time()
12
 
 
20
 
21
  def extract_each_feature(sample):
22
  """
23
+ FIXED: More robustly extracts recipe features and cleans duplicated content.
24
  """
25
  full_text = sample['input']
26
 
27
+ # Extract Title
28
  title = full_text[:full_text.find("\\n")]
29
 
30
+ # Extract Ingredients
31
+ ingredients = "Not available"
32
+ if "Ingredients:" in full_text:
33
+ start_ing = full_text.find("Ingredients:") + len("Ingredients:")
34
+ end_ing = full_text.find("Directions:")
35
+ if end_ing > start_ing:
36
+ ingredients = full_text[start_ing:end_ing].strip()
37
+
38
+ # Extract and clean Directions
39
+ directions = "Not available"
40
+ if "Directions:" in full_text:
41
+ start_dir = full_text.find("Directions:") + len("Directions:")
42
+ # Take the text after the first "Directions:"
43
+ temp_directions = full_text[start_dir:].strip()
44
+ # If "Ingredients:" appears again, it's a duplicate. Cut it off.
45
+ if "Ingredients:" in temp_directions:
46
+ end_dir = temp_directions.find("Ingredients:")
47
+ directions = temp_directions[:end_dir].strip()
48
+ else:
49
+ directions = temp_directions
50
 
51
  return {
52
  "title": title,
 
74
  index.add(embeddings)
75
  print(f"Index is ready. Total vectors in index: {index.ntotal}")
76
 
77
+ # --- 3. SYNTHETIC GENERATION (IMPROVED MODEL AND PROMPT) ---
78
  print("Loading generative model...")
79
+ # Switched back to gpt2 for better instruction following
80
+ generator = pipeline('text-generation', model='gpt2')
81
 
82
  def get_recommendations_and_generate(query_ingredients, k=3):
83
  # 1. Get Recommendations
 
95
  }
96
  results.append(recipe)
97
 
98
+ # 2. Generate a new recipe with a better "few-shot" prompt
99
+ prompt = f"""Given the ingredients: {query_ingredients}, create a complete recipe.
100
+
101
+ ### Title ###
102
+ [A creative recipe title]
103
+
104
+ ### Ingredients ###
105
+ - [List of ingredients]
106
+
107
+ ### Directions ###
108
+ 1. [First step]
109
+ 2. [Second step]
110
+ 3. [And so on...]
111
+
112
+ ### Title ###
 
 
 
113
  """
114
  # Generate the recipe text
115
+ generated_outputs = generator(prompt, max_new_tokens=250, num_return_sequences=1, pad_token_id=50256)
116
  generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
117
 
118
+ # 3. More robustly parse the generated text
119
  try:
120
+ title = "AI Generated Recipe"
121
+ ingredients = ""
122
+ directions = ""
123
+
124
+ if "### Ingredients ###" in generated_text:
125
+ parts = generated_text.split("### Ingredients ###")
126
+ title = parts[0].strip()
127
+ rest = parts[1]
128
+ if "### Directions ###" in rest:
129
+ dir_parts = rest.split("### Directions ###")
130
+ ingredients = dir_parts[0].strip()
131
+ directions = dir_parts[1].strip()
132
+ else:
133
+ ingredients = rest.strip()
134
+ else:
135
+ directions = generated_text # Fallback
136
+
137
  generated_recipe = {
138
+ "title": title,
139
+ "ingredients": ingredients,
140
+ "directions": directions
141
  }
142
+ except Exception as e:
143
+ print(f"Error parsing generated text: {e}")
144
  generated_recipe = {
145
+ "title": "AI Generated Recipe (Parsing Error)",
146
  "ingredients": "Could not determine ingredients.",
147
  "directions": generated_text
148
  }