TheHuriShow commited on
Commit
1a67f51
·
verified ·
1 Parent(s): 07e3484

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -18
app.py CHANGED
@@ -20,7 +20,7 @@ def extract_title_and_ingredients(sample):
20
 
21
  def extract_each_feature(sample):
22
  """
23
- Extract each feature of a recipe from a sample, and clean up potential duplications.
24
  """
25
  full_text = sample['input']
26
 
@@ -60,12 +60,10 @@ print("Loading embedding model...")
60
  model_name = "all-MiniLM-L6-v2"
61
  embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
62
 
63
- # Compute embeddings for the dataset
64
  print("Generating embeddings...")
65
  embeddings = embedding_model.encode(dataset['text_for_embedding'], show_progress_bar=True)
66
  embeddings = np.array(embeddings, dtype=np.float32)
67
 
68
- # Build FAISS index for similarity search
69
  print("Building FAISS index...")
70
  dimension = embeddings.shape[1]
71
  index = faiss.IndexFlatL2(dimension)
@@ -74,7 +72,7 @@ print(f"Index is ready. Total vectors in index: {index.ntotal}")
74
 
75
  # --- 3. SYNTHETIC GENERATION ---
76
  print("Loading generative model...")
77
- generator = pipeline('text-generation', model='gpt2')
78
 
79
  def get_recommendations_and_generate(query_ingredients, k=3):
80
  # 1. Get Recommendations
@@ -92,20 +90,46 @@ def get_recommendations_and_generate(query_ingredients, k=3):
92
  }
93
  results.append(recipe)
94
 
95
- # 2. Generate a new recipe idea
96
- prompt = f"Create a simple recipe title and a list of ingredients using: {query_ingredients}."
97
- generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
98
-
99
- # Clean up generated text to be more readable
100
- generated_recipe_parts = generated_text.split("Ingredients:")
101
- generated_title = generated_recipe_parts[0].replace(prompt, "").strip()
102
- generated_ingredients = generated_recipe_parts[1].strip() if len(generated_recipe_parts) > 1 else "Could not determine ingredients."
103
-
104
- generated_recipe = {
105
- "title": generated_title,
106
- "ingredients": generated_ingredients,
107
- "directions": "This is an AI-generated idea. Directions are not provided."
108
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  return results[0], results[1], results[2], generated_recipe
111
 
 
20
 
21
  def extract_each_feature(sample):
22
  """
23
+ FIXED: Extract each feature of a recipe from a sample and clean up potential duplications.
24
  """
25
  full_text = sample['input']
26
 
 
60
  model_name = "all-MiniLM-L6-v2"
61
  embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
62
 
 
63
  print("Generating embeddings...")
64
  embeddings = embedding_model.encode(dataset['text_for_embedding'], show_progress_bar=True)
65
  embeddings = np.array(embeddings, dtype=np.float32)
66
 
 
67
  print("Building FAISS index...")
68
  dimension = embeddings.shape[1]
69
  index = faiss.IndexFlatL2(dimension)
 
72
 
73
  # --- 3. SYNTHETIC GENERATION ---
74
  print("Loading generative model...")
75
+ generator = pipeline('text-generation', model='distilgpt2')
76
 
77
  def get_recommendations_and_generate(query_ingredients, k=3):
78
  # 1. Get Recommendations
 
90
  }
91
  results.append(recipe)
92
 
93
+ # 2. Generate a new recipe with a structured "few-shot" prompt
94
+ prompt = f"""Create a full recipe including a title, ingredients, and directions based on the following items: {query_ingredients}.
95
+
96
+ ### Title:
97
+ [Recipe Title]
98
+
99
+ ### Ingredients:
100
+ - [Ingredient 1]
101
+ - [Ingredient 2]
102
+ - [Ingredient 3]
103
+
104
+ ### Directions:
105
+ 1. [Step 1]
106
+ 2. [Step 2]
107
+ 3. [Step 3]
108
+ ---
109
+ Recipe:
110
+ ### Title:
111
+ """
112
+ # Generate the recipe text
113
+ generated_outputs = generator(prompt, max_new_tokens=200, num_return_sequences=1)
114
+ generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
115
+
116
+ # 3. Parse the generated text into a structured format
117
+ try:
118
+ title_part, rest = generated_text.split("### Ingredients:", 1)
119
+ ingredients_part, directions_part = rest.split("### Directions:", 1)
120
+
121
+ generated_recipe = {
122
+ "title": title_part.strip(),
123
+ "ingredients": ingredients_part.strip(),
124
+ "directions": directions_part.strip()
125
+ }
126
+ except ValueError:
127
+ # Fallback if the model doesn't follow the format perfectly
128
+ generated_recipe = {
129
+ "title": "AI Generated Recipe",
130
+ "ingredients": "Could not determine ingredients.",
131
+ "directions": generated_text
132
+ }
133
 
134
  return results[0], results[1], results[2], generated_recipe
135