Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
| 3 |
-
from sentence_transformers import SentenceTransformer
|
| 4 |
import faiss
|
| 5 |
import numpy as np
|
| 6 |
from transformers import pipeline
|
|
@@ -51,10 +51,11 @@ def extract_each_feature(sample):
|
|
| 51 |
"directions": directions,
|
| 52 |
}
|
| 53 |
|
|
|
|
| 54 |
# Apply preprocessing
|
| 55 |
dataset = dataset.map(extract_each_feature)
|
| 56 |
|
| 57 |
-
# --- 2. EMBEDDING AND RECOMMENDATION ENGINE
|
| 58 |
print("Loading embedding model...")
|
| 59 |
model_name = "all-MiniLM-L6-v2"
|
| 60 |
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
|
|
@@ -62,16 +63,15 @@ embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
|
|
| 62 |
index_file = "recipe_index.faiss"
|
| 63 |
|
| 64 |
print(f"Loading FAISS index from {index_file}...")
|
| 65 |
-
# Load the pre-computed FAISS index
|
| 66 |
index = faiss.read_index(index_file)
|
| 67 |
print(f"Index is ready. Total vectors in index: {index.ntotal}")
|
| 68 |
|
| 69 |
-
# --- 3. SYNTHETIC GENERATION (IMPROVED
|
| 70 |
print("Loading generative model...")
|
| 71 |
generator = pipeline('text-generation', model='gpt2-medium')
|
| 72 |
|
| 73 |
def get_recommendations_and_generate(query_ingredients, k=3):
|
| 74 |
-
#
|
| 75 |
query_vector = embedding_model.encode([query_ingredients])
|
| 76 |
query_vector = np.array(query_vector, dtype=np.float32)
|
| 77 |
distances, indices = index.search(query_vector, k)
|
|
@@ -86,77 +86,42 @@ def get_recommendations_and_generate(query_ingredients, k=3):
|
|
| 86 |
}
|
| 87 |
results.append(recipe)
|
| 88 |
|
| 89 |
-
# Defensive check: Ensure there are always 3 recommendations
|
| 90 |
while len(results) < 3:
|
| 91 |
results.append({"title": "No recipe found", "ingredients": "", "directions": ""})
|
| 92 |
-
|
| 93 |
-
# --- 2. Generate and Parse a New Recipe (with error handling) ---
|
| 94 |
-
generated_recipe = {
|
| 95 |
-
"title": "AI Recipe Generation Failed",
|
| 96 |
-
"ingredients": "The model could not generate a recipe for these ingredients.",
|
| 97 |
-
"directions": "Please try a different combination of ingredients."
|
| 98 |
-
}
|
| 99 |
-
|
| 100 |
-
try:
|
| 101 |
-
prompt = f"Create a simple and delicious recipe using the following ingredients: {query_ingredients}."
|
| 102 |
-
|
| 103 |
-
generated_outputs = generator(prompt, max_new_tokens=250, num_return_sequences=1, pad_token_id=50256)
|
| 104 |
-
|
| 105 |
-
# Check if the model returned a valid output
|
| 106 |
-
if not generated_outputs or 'generated_text' not in generated_outputs[0]:
|
| 107 |
-
raise ValueError("Model did not return generated_text.")
|
| 108 |
-
|
| 109 |
-
generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
|
| 110 |
-
|
| 111 |
-
# Parsing logic
|
| 112 |
-
lines = generated_text.split('\n')
|
| 113 |
-
if not lines or lines[0] == "":
|
| 114 |
-
raise ValueError("Generated text is empty.")
|
| 115 |
-
|
| 116 |
-
title = lines[0].strip()
|
| 117 |
-
|
| 118 |
-
ing_index = -1
|
| 119 |
-
dir_index = -1
|
| 120 |
-
for i, line in enumerate(lines):
|
| 121 |
-
if "ingredients" in line.lower() and ing_index == -1:
|
| 122 |
-
ing_index = i
|
| 123 |
-
if "directions" in line.lower() and dir_index == -1:
|
| 124 |
-
dir_index = i
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
elif dir_index != -1:
|
| 133 |
-
directions = "\n".join(lines[dir_index+1:]).strip()
|
| 134 |
-
ingredients = "Not provided."
|
| 135 |
-
else:
|
| 136 |
-
ingredients = "Not provided."
|
| 137 |
-
directions = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
|
| 138 |
-
|
| 139 |
-
generated_recipe = {
|
| 140 |
-
"title": title,
|
| 141 |
-
"ingredients": ingredients,
|
| 142 |
-
"directions": directions
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
except Exception as e:
|
| 146 |
-
print(f"An error occurred in get_recommendations_and_generate: {e}")
|
| 147 |
-
# The generated_recipe dictionary is already set to a default error message
|
| 148 |
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# --- 4. GRADIO USER INTERFACE ---
|
| 152 |
def format_recipe(recipe):
|
|
|
|
| 153 |
if not recipe or not recipe['title']:
|
| 154 |
return "### No recipe found."
|
| 155 |
return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
def recipe_wizard(ingredients):
|
| 158 |
-
rec1, rec2, rec3,
|
| 159 |
-
return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3),
|
| 160 |
|
| 161 |
end_time = time.time()
|
| 162 |
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
| 3 |
+
from sentence_transformers import SentenceTransformer, util
|
| 4 |
import faiss
|
| 5 |
import numpy as np
|
| 6 |
from transformers import pipeline
|
|
|
|
| 51 |
"directions": directions,
|
| 52 |
}
|
| 53 |
|
| 54 |
+
|
| 55 |
# Apply preprocessing
|
| 56 |
dataset = dataset.map(extract_each_feature)
|
| 57 |
|
| 58 |
+
# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
|
| 59 |
print("Loading embedding model...")
|
| 60 |
model_name = "all-MiniLM-L6-v2"
|
| 61 |
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
|
|
|
|
| 63 |
index_file = "recipe_index.faiss"
|
| 64 |
|
| 65 |
print(f"Loading FAISS index from {index_file}...")
|
|
|
|
| 66 |
index = faiss.read_index(index_file)
|
| 67 |
print(f"Index is ready. Total vectors in index: {index.ntotal}")
|
| 68 |
|
| 69 |
+
# --- 3. SYNTHETIC GENERATION (IMPROVED) ---
|
| 70 |
print("Loading generative model...")
|
| 71 |
generator = pipeline('text-generation', model='gpt2-medium')
|
| 72 |
|
| 73 |
def get_recommendations_and_generate(query_ingredients, k=3):
|
| 74 |
+
# 1. Get Recommendations
|
| 75 |
query_vector = embedding_model.encode([query_ingredients])
|
| 76 |
query_vector = np.array(query_vector, dtype=np.float32)
|
| 77 |
distances, indices = index.search(query_vector, k)
|
|
|
|
| 86 |
}
|
| 87 |
results.append(recipe)
|
| 88 |
|
|
|
|
| 89 |
while len(results) < 3:
|
| 90 |
results.append({"title": "No recipe found", "ingredients": "", "directions": ""})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# 2. Generate 10 new recipe ideas
|
| 93 |
+
prompt = f"Create a simple and delicious recipe using the following ingredients: {query_ingredients}."
|
| 94 |
+
generated_outputs = generator(prompt, max_new_tokens=200, num_return_sequences=10, pad_token_id=50256)
|
| 95 |
+
|
| 96 |
+
# 3. Find the best recipe out of the 10 generated
|
| 97 |
+
generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
# Embed all 10 generated texts
|
| 100 |
+
generated_embeddings = embedding_model.encode(generated_texts)
|
| 101 |
+
|
| 102 |
+
# Calculate cosine similarity between the user's query and each generated text
|
| 103 |
+
similarities = util.cos_sim(query_vector, generated_embeddings)
|
| 104 |
+
|
| 105 |
+
# Find the index of the most similar generated recipe
|
| 106 |
+
best_recipe_index = np.argmax(similarities)
|
| 107 |
+
best_generated_recipe = generated_texts[best_recipe_index]
|
| 108 |
+
|
| 109 |
+
return results[0], results[1], results[2], best_generated_recipe
|
| 110 |
|
| 111 |
# --- 4. GRADIO USER INTERFACE ---
|
| 112 |
def format_recipe(recipe):
|
| 113 |
+
# Formats the recommended recipes with markdown
|
| 114 |
if not recipe or not recipe['title']:
|
| 115 |
return "### No recipe found."
|
| 116 |
return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
|
| 117 |
|
| 118 |
+
def format_generated_recipe(recipe_text):
|
| 119 |
+
# Formats the AI-generated recipe as simple text
|
| 120 |
+
return recipe_text
|
| 121 |
+
|
| 122 |
def recipe_wizard(ingredients):
|
| 123 |
+
rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(ingredients)
|
| 124 |
+
return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_generated_recipe(gen_rec_text)
|
| 125 |
|
| 126 |
end_time = time.time()
|
| 127 |
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
|