Spaces:
Sleeping
Sleeping
File size: 5,028 Bytes
d782710 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os
from transformers import pipeline
import time
# --- 1. DATA LOADING AND PREPROCESSING ---
print("===== Application Startup =====")
start_time = time.time()
# Load dataset
dataset = load_dataset("corbt/all-recipes", split="train[:20000]")
# Preprocessing functions
def extract_title_and_ingredients(sample):
extraction = sample['input'][:sample['input'].find("Directions")]
return {"text_for_embedding": extraction}
def extract_each_feature(sample):
title = sample['input'][:sample['input'].find("\\n")]
ingredients = sample['input'][sample['input'].find("Ingredients")+len("Ingredients:\\n"):sample['input'].find("Directions")].strip()
directions = sample['input'][sample['input'].find("Directions")+len("Directions:\\n"):].strip()
return {"title": title, "ingredients": ingredients, "directions": directions}
# Apply preprocessing
dataset = dataset.map(extract_title_and_ingredients)
dataset = dataset.map(extract_each_feature)
# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
model_name = "all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
# Compute embeddings
print("Loading dataset and embedding model...")
embeddings = embedding_model.encode(dataset['text_for_embedding'], show_progress_bar=True)
embeddings = np.array(embeddings, dtype=np.float32)
# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print(f"Index is ready. Total vectors in index: {index.ntotal}")
# --- 3. SYNTHETIC GENERATION ---
generator = pipeline('text-generation', model='gpt2')
def get_recommendations_and_generate(query_ingredients, k=3):
# 1. Get Recommendations
query_vector = embedding_model.encode([query_ingredients])
query_vector = np.array(query_vector, dtype=np.float32)
distances, indices = index.search(query_vector, k)
results = []
for i, idx_numpy in enumerate(indices[0]):
idx = int(idx_numpy) # FIX: Convert numpy.int64 to standard Python int
recipe = {
"title": dataset[idx]['title'],
"ingredients": dataset[idx]['ingredients'],
"directions": dataset[idx]['directions']
}
results.append(recipe)
# 2. Generate a new recipe idea
prompt = f"Create a short, simple recipe title and a list of ingredients using: {query_ingredients}."
generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
# Clean up generated text to be more readable
# (This is a basic cleanup, can be improved)
generated_recipe_parts = generated_text.split("Ingredients:")
generated_title = generated_recipe_parts[0].replace(prompt.replace(f"using: {query_ingredients}",""), "").strip()
generated_ingredients = generated_recipe_parts[1].strip() if len(generated_recipe_parts) > 1 else "Could not determine ingredients."
generated_recipe = {
"title": generated_title,
"ingredients": generated_ingredients,
"directions": "This is an AI-generated idea. Directions are not provided."
}
return results[0], results[1], results[2], generated_recipe
# --- 4. GRADIO USER INTERFACE ---
def format_recipe(recipe):
if not recipe or not recipe['title']:
return "### No recipe found."
return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
def recipe_wizard(ingredients):
rec1, rec2, rec3, gen_rec = get_recommendations_and_generate(ingredients)
return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_recipe(gen_rec)
end_time = time.time()
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🍳 RecipeWizard AI")
gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
with gr.Row():
ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
submit_btn = gr.Button("Get Recipes")
with gr.Row():
with gr.Column():
gr.Markdown("### Recommended Recipes")
output_rec1 = gr.Markdown()
output_rec2 = gr.Markdown()
output_rec3 = gr.Markdown()
with gr.Column():
gr.Markdown("### ✨ AI-Generated Idea")
output_gen = gr.Markdown()
submit_btn.click(
fn=recipe_wizard,
inputs=ingredient_input,
outputs=[output_rec1, output_rec2, output_rec3, output_gen]
)
gr.Examples(
examples=[
["chicken, broccoli, cheese"],
["ground beef, potatoes, onions"],
["flour, sugar, eggs, butter"]
],
inputs=ingredient_input
)
demo.launch() |