File size: 5,028 Bytes
d782710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os
from transformers import pipeline
import time

# --- 1. DATA LOADING AND PREPROCESSING ---
print("===== Application Startup =====")
start_time = time.time()

# Load dataset
dataset = load_dataset("corbt/all-recipes", split="train[:20000]")

# Preprocessing functions
def extract_title_and_ingredients(sample):
    extraction = sample['input'][:sample['input'].find("Directions")]
    return {"text_for_embedding": extraction}

def extract_each_feature(sample):
    title = sample['input'][:sample['input'].find("\\n")]
    ingredients = sample['input'][sample['input'].find("Ingredients")+len("Ingredients:\\n"):sample['input'].find("Directions")].strip()
    directions = sample['input'][sample['input'].find("Directions")+len("Directions:\\n"):].strip()
    return {"title": title, "ingredients": ingredients, "directions": directions}

# Apply preprocessing
dataset = dataset.map(extract_title_and_ingredients)
dataset = dataset.map(extract_each_feature)

# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
model_name = "all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")

# Compute embeddings
print("Loading dataset and embedding model...")
embeddings = embedding_model.encode(dataset['text_for_embedding'], show_progress_bar=True)
embeddings = np.array(embeddings, dtype=np.float32)

# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print(f"Index is ready. Total vectors in index: {index.ntotal}")

# --- 3. SYNTHETIC GENERATION ---
generator = pipeline('text-generation', model='gpt2')

def get_recommendations_and_generate(query_ingredients, k=3):
    # 1. Get Recommendations
    query_vector = embedding_model.encode([query_ingredients])
    query_vector = np.array(query_vector, dtype=np.float32)
    distances, indices = index.search(query_vector, k)
    
    results = []
    for i, idx_numpy in enumerate(indices[0]):
        idx = int(idx_numpy) # FIX: Convert numpy.int64 to standard Python int
        recipe = {
            "title": dataset[idx]['title'],
            "ingredients": dataset[idx]['ingredients'],
            "directions": dataset[idx]['directions']
        }
        results.append(recipe)
        
    # 2. Generate a new recipe idea
    prompt = f"Create a short, simple recipe title and a list of ingredients using: {query_ingredients}."
    generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
    
    # Clean up generated text to be more readable
    # (This is a basic cleanup, can be improved)
    generated_recipe_parts = generated_text.split("Ingredients:")
    generated_title = generated_recipe_parts[0].replace(prompt.replace(f"using: {query_ingredients}",""), "").strip()
    generated_ingredients = generated_recipe_parts[1].strip() if len(generated_recipe_parts) > 1 else "Could not determine ingredients."
    
    generated_recipe = {
        "title": generated_title,
        "ingredients": generated_ingredients,
        "directions": "This is an AI-generated idea. Directions are not provided."
    }
    
    return results[0], results[1], results[2], generated_recipe

# --- 4. GRADIO USER INTERFACE ---
def format_recipe(recipe):
    if not recipe or not recipe['title']:
        return "### No recipe found."
    return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"

def recipe_wizard(ingredients):
    rec1, rec2, rec3, gen_rec = get_recommendations_and_generate(ingredients)
    return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_recipe(gen_rec)

end_time = time.time()
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🍳 RecipeWizard AI")
    gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
    
    with gr.Row():
        ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
        submit_btn = gr.Button("Get Recipes")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Recommended Recipes")
            output_rec1 = gr.Markdown()
            output_rec2 = gr.Markdown()
            output_rec3 = gr.Markdown()
        with gr.Column():
            gr.Markdown("### ✨ AI-Generated Idea")
            output_gen = gr.Markdown()

    submit_btn.click(
        fn=recipe_wizard,
        inputs=ingredient_input,
        outputs=[output_rec1, output_rec2, output_rec3, output_gen]
    )
    
    gr.Examples(
        examples=[
            ["chicken, broccoli, cheese"],
            ["ground beef, potatoes, onions"],
            ["flour, sugar, eggs, butter"]
        ],
        inputs=ingredient_input
    )

demo.launch()