File size: 6,129 Bytes
37d9971
68d4109
be37158
83e7fb5
 
 
 
 
69a6292
68d4109
83e7fb5
 
07e3484
83e7fb5
 
07e3484
83e7fb5
07e3484
 
f1f59f6
69a6292
07e3484
f842f30
 
69a6292
 
 
 
 
 
07e3484
69a6292
 
 
 
 
 
 
 
 
 
 
 
 
07e3484
 
 
 
 
 
68d4109
83e7fb5
 
be37158
07e3484
68d4109
 
83e7fb5
69a6292
68d4109
69a6292
 
68d4109
 
be37158
07e3484
e4702ce
68d4109
 
be37158
2e29b2f
68d4109
 
 
 
07e3484
 
68d4109
 
 
 
 
 
 
babdea3
 
2e29b2f
f1f59f6
 
 
 
 
be37158
 
 
68d4109
be37158
 
 
 
 
 
 
 
 
 
 
83e7fb5
68d4109
 
be37158
68d4109
 
 
 
be37158
f1f59f6
be37158
 
68d4109
be37158
 
83e7fb5
 
 
 
68d4109
 
 
 
 
 
 
 
 
 
07e3484
68d4109
 
 
 
07e3484
68d4109
f1f59f6
68d4109
 
 
 
 
 
83e7fb5
68d4109
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
import faiss
import numpy as np
from transformers import pipeline
import time

# --- 1. DATA LOADING AND PREPROCESSING ---
print("===== Application Startup =====")
start_time = time.time()

# Load dataset and limit to the first 20,000 rows
dataset = load_dataset("corbt/all-recipes", split="train[:20000]")

# Preprocessing functions to extract features from the raw text
def extract_each_feature(sample):
    full_text = sample['input']
    
    # User's fix: Use "\n" instead of "\\n" to correctly find the title
    title = full_text[:full_text.find("\n")]
    
    ingredients = "Not available"
    directions = "Not available"
    
    ingredients_start_index = full_text.find("Ingredients:")
    directions_start_index = full_text.find("Directions:")
    
    if ingredients_start_index != -1 and directions_start_index != -1:
        ingredients = full_text[ingredients_start_index + len("Ingredients:"):directions_start_index].strip()

    if directions_start_index != -1:
        directions_raw = full_text[directions_start_index + len("Directions:"):].strip()
        
        next_ing_index = directions_raw.find("Ingredients:")
        next_dir_index = directions_raw.find("Directions:")
        
        cut_off_indices = [idx for idx in [next_ing_index, next_dir_index] if idx != -1]
        if cut_off_indices:
            cut_off_point = min(cut_off_indices)
            directions = directions_raw[:cut_off_point].strip()
        else:
            directions = directions_raw
            
    return {
        "title": title,
        "ingredients": ingredients,
        "directions": directions,
    }

# Apply preprocessing
dataset = dataset.map(extract_each_feature)

# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
print("Loading embedding model...")
model_name = "all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")

index_file = "recipe_index.faiss"

print(f"Loading FAISS index from {index_file}...")
index = faiss.read_index(index_file)
print(f"Index is ready. Total vectors in index: {index.ntotal}")

# --- 3. SYNTHETIC GENERATION (IMPROVED) ---
print("Loading generative model...")
generator = pipeline('text-generation', model='gpt2')

def get_recommendations_and_generate(query_ingredients, k=3):
    # 1. Get Recommendations
    query_vector = embedding_model.encode([query_ingredients])
    query_vector = np.array(query_vector, dtype=np.float32)
    distances, indices = index.search(query_vector, k)
    
    results = []
    for idx_numpy in indices[0]:
        idx = int(idx_numpy) 
        recipe = {
            "title": dataset[idx]['title'],
            "ingredients": dataset[idx]['ingredients'],
            "directions": dataset[idx]['directions']
        }
        results.append(recipe)
        
    while len(results) < 3:
        results.append({"title": "No recipe found", "ingredients": "", "directions": ""})
        
    # 2. Generate 10 new recipe ideas with a simpler, more direct prompt
    prompt = f"Write a complete recipe that includes a title, a list of ingredients, and step-by-step directions. The recipe must use the following ingredients: {query_ingredients}."
    
    # Optimized for speed by reducing max_new_tokens
    generated_outputs = generator(prompt, max_new_tokens=180, num_return_sequences=10, pad_token_id=50256)
    
    # 3. Find the best recipe out of the 10 generated
    generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
    
    # Embed all 10 generated texts
    generated_embeddings = embedding_model.encode(generated_texts)
    
    # Calculate cosine similarity between the user's query and each generated text
    similarities = util.cos_sim(query_vector, generated_embeddings)
    
    # Find the index of the most similar generated recipe
    best_recipe_index = np.argmax(similarities)
    best_generated_recipe = generated_texts[best_recipe_index]

    return results[0], results[1], results[2], best_generated_recipe

# --- 4. GRADIO USER INTERFACE ---
def format_recipe(recipe):
    # Formats the recommended recipes with markdown
    if not recipe or not recipe['title']:
        return "### No recipe found."
    return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"

def format_generated_recipe(recipe_text):
    # Formats the AI-generated recipe as simple text, without extra markdown
    return recipe_text

def recipe_wizard(ingredients):
    rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(ingredients)
    return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_generated_recipe(gen_rec_text)

end_time = time.time()
print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🍳 RecipeWizard AI")
    gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
    
    with gr.Row():
        ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
        submit_btn = gr.Button("Get Recipes")

    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### Recommended Recipes")
            output_rec1 = gr.Markdown()
            output_rec2 = gr.Markdown()
            output_rec3 = gr.Markdown()
        with gr.Column(scale=1):
            gr.Markdown("### ✨ AI-Generated Idea")
            output_gen = gr.Textbox(label="AI Generated Recipe", lines=15) # Changed to Textbox for plain text

    submit_btn.click(
        fn=recipe_wizard,
        inputs=ingredient_input,
        outputs=[output_rec1, output_rec2, output_rec3, output_gen]
    )
    
    gr.Examples(
        examples=[
            ["chicken, broccoli, cheese"],
            ["ground beef, potatoes, onions"],
            ["flour, sugar, eggs, butter"]
        ],
        inputs=ingredient_input
    )

demo.launch()