Spaces:
Sleeping
Sleeping
File size: 6,129 Bytes
37d9971 68d4109 be37158 83e7fb5 69a6292 68d4109 83e7fb5 07e3484 83e7fb5 07e3484 83e7fb5 07e3484 f1f59f6 69a6292 07e3484 f842f30 69a6292 07e3484 69a6292 07e3484 68d4109 83e7fb5 be37158 07e3484 68d4109 83e7fb5 69a6292 68d4109 69a6292 68d4109 be37158 07e3484 e4702ce 68d4109 be37158 2e29b2f 68d4109 07e3484 68d4109 babdea3 2e29b2f f1f59f6 be37158 68d4109 be37158 83e7fb5 68d4109 be37158 68d4109 be37158 f1f59f6 be37158 68d4109 be37158 83e7fb5 68d4109 07e3484 68d4109 07e3484 68d4109 f1f59f6 68d4109 83e7fb5 68d4109 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
import faiss
import numpy as np
from transformers import pipeline
import time
# --- 1. DATA LOADING AND PREPROCESSING ---
print("===== Application Startup =====")
start_time = time.time()
# Load dataset and limit to the first 20,000 rows
dataset = load_dataset("corbt/all-recipes", split="train[:20000]")
# Preprocessing functions to extract features from the raw text
def extract_each_feature(sample):
full_text = sample['input']
# User's fix: Use "\n" instead of "\\n" to correctly find the title
title = full_text[:full_text.find("\n")]
ingredients = "Not available"
directions = "Not available"
ingredients_start_index = full_text.find("Ingredients:")
directions_start_index = full_text.find("Directions:")
if ingredients_start_index != -1 and directions_start_index != -1:
ingredients = full_text[ingredients_start_index + len("Ingredients:"):directions_start_index].strip()
if directions_start_index != -1:
directions_raw = full_text[directions_start_index + len("Directions:"):].strip()
next_ing_index = directions_raw.find("Ingredients:")
next_dir_index = directions_raw.find("Directions:")
cut_off_indices = [idx for idx in [next_ing_index, next_dir_index] if idx != -1]
if cut_off_indices:
cut_off_point = min(cut_off_indices)
directions = directions_raw[:cut_off_point].strip()
else:
directions = directions_raw
return {
"title": title,
"ingredients": ingredients,
"directions": directions,
}
# Apply preprocessing
dataset = dataset.map(extract_each_feature)
# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
print("Loading embedding model...")
model_name = "all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
index_file = "recipe_index.faiss"
print(f"Loading FAISS index from {index_file}...")
index = faiss.read_index(index_file)
print(f"Index is ready. Total vectors in index: {index.ntotal}")
# --- 3. SYNTHETIC GENERATION (IMPROVED) ---
print("Loading generative model...")
generator = pipeline('text-generation', model='gpt2')
def get_recommendations_and_generate(query_ingredients, k=3):
# 1. Get Recommendations
query_vector = embedding_model.encode([query_ingredients])
query_vector = np.array(query_vector, dtype=np.float32)
distances, indices = index.search(query_vector, k)
results = []
for idx_numpy in indices[0]:
idx = int(idx_numpy)
recipe = {
"title": dataset[idx]['title'],
"ingredients": dataset[idx]['ingredients'],
"directions": dataset[idx]['directions']
}
results.append(recipe)
while len(results) < 3:
results.append({"title": "No recipe found", "ingredients": "", "directions": ""})
# 2. Generate 10 new recipe ideas with a simpler, more direct prompt
prompt = f"Write a complete recipe that includes a title, a list of ingredients, and step-by-step directions. The recipe must use the following ingredients: {query_ingredients}."
# Optimized for speed by reducing max_new_tokens
generated_outputs = generator(prompt, max_new_tokens=180, num_return_sequences=10, pad_token_id=50256)
# 3. Find the best recipe out of the 10 generated
generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
# Embed all 10 generated texts
generated_embeddings = embedding_model.encode(generated_texts)
# Calculate cosine similarity between the user's query and each generated text
similarities = util.cos_sim(query_vector, generated_embeddings)
# Find the index of the most similar generated recipe
best_recipe_index = np.argmax(similarities)
best_generated_recipe = generated_texts[best_recipe_index]
return results[0], results[1], results[2], best_generated_recipe
# --- 4. GRADIO USER INTERFACE ---
def format_recipe(recipe):
# Formats the recommended recipes with markdown
if not recipe or not recipe['title']:
return "### No recipe found."
return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
def format_generated_recipe(recipe_text):
# Formats the AI-generated recipe as simple text, without extra markdown
return recipe_text
def recipe_wizard(ingredients):
rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(ingredients)
return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_generated_recipe(gen_rec_text)
end_time = time.time()
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🍳 RecipeWizard AI")
gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
with gr.Row():
ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
submit_btn = gr.Button("Get Recipes")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Recommended Recipes")
output_rec1 = gr.Markdown()
output_rec2 = gr.Markdown()
output_rec3 = gr.Markdown()
with gr.Column(scale=1):
gr.Markdown("### ✨ AI-Generated Idea")
output_gen = gr.Textbox(label="AI Generated Recipe", lines=15) # Changed to Textbox for plain text
submit_btn.click(
fn=recipe_wizard,
inputs=ingredient_input,
outputs=[output_rec1, output_rec2, output_rec3, output_gen]
)
gr.Examples(
examples=[
["chicken, broccoli, cheese"],
["ground beef, potatoes, onions"],
["flour, sugar, eggs, butter"]
],
inputs=ingredient_input
)
demo.launch() |