TheHuriShow commited on
Commit
68d4109
·
verified ·
1 Parent(s): dd88dab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -112
app.py CHANGED
@@ -1,136 +1,131 @@
1
- # app.py
2
-
3
  import gradio as gr
 
 
4
  import faiss
5
  import numpy as np
6
- from datasets import load_dataset
7
- from sentence_transformers import SentenceTransformer, util
8
  from transformers import pipeline
9
  import time
10
 
11
- # --- 1. LOAD MODELS AND DATA (GLOBAL SCOPE) ---
12
- # This section runs only once when the app starts.
13
-
14
- print("Loading dataset and embedding model...")
15
  start_time = time.time()
16
 
17
- # Load the dataset
18
  dataset = load_dataset("corbt/all-recipes", split="train[:20000]")
19
 
20
- # Extract title and ingredients for embedding
21
  def extract_title_and_ingredients(sample):
22
- """
23
- Extract the title and ingredients of a recipe from a sample.
24
- """
25
  extraction = sample['input'][:sample['input'].find("Directions")]
26
- return {
27
- "text_for_embedding": extraction
28
- }
29
 
30
- dataset = dataset.map(extract_title_and_ingredients)
31
-
32
-
33
- # Extract all features
34
  def extract_each_feature(sample):
35
- """
36
- Extract each feature of a recipe from a sample.
37
- """
38
- title = sample['input'][:sample['input'].find("\n")]
39
- ingredients = sample['input'][sample['input'].find("Ingredients")+len("Ingredients:\n"):sample['input'].find("Directions")].strip()
40
- directions = sample['input'][sample['input'].find("Directions")+len("Directions:\n"):].strip()
41
- return {
42
- "title": title,
43
- "ingredients": ingredients,
44
- "directions": directions,
45
- }
46
 
 
 
47
  dataset = dataset.map(extract_each_feature)
48
 
49
- # Load the embedding model
50
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
51
 
52
- # Load the pre-built FAISS index
53
- try:
54
- index = faiss.read_index("recipe_index.faiss")
55
- except Exception as e:
56
- print(f"Could not load FAISS index. Error: {e}. Please ensure 'recipe_index.faiss' is in the same directory.")
57
- # Handle error gracefully, maybe by disabling the search feature
58
- index = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # Load the text generation model
61
- generator = pipeline('text-generation', model='distilgpt2')
 
 
 
 
 
 
 
62
 
63
  end_time = time.time()
64
  print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
65
 
66
-
67
- # --- 2. DEFINE THE CORE LOGIC FUNCTION ---
68
- def find_and_generate(user_ingredients):
69
- """
70
- This function takes user ingredients, finds similar recipes,
71
- and generates a new one.
72
- """
73
- if not user_ingredients:
74
- return "<p>Please enter some ingredients.</p>", "<p></p>"
75
-
76
- # --- Recommendation Logic ---
77
- if index:
78
- query_vector = embedding_model.encode([user_ingredients])
79
- # We search for 3 results.
80
- distances, indices = index.search(np.array(query_vector, dtype=np.float32), 3)
81
-
82
- recs_html = "<h2>Top 3 Similar Recipes:</h2>"
83
- for i, idx in enumerate(indices[0]):
84
- title = dataset[int(idx)].get('title', 'No Title')
85
- ingredients_list = dataset[int(idx)].get('ingredients', [])
86
- # Use a simple join for cleaner HTML
87
- ingredients = ", ".join(ingredients_list)
88
-
89
- recs_html += f"<h3>{i+1}. {title}</h3>"
90
- recs_html += f"<p><b>Ingredients:</b> {ingredients}</p><hr>"
91
- else:
92
- recs_html = "<h2>Recommendation engine not available.</h2> <p>Could not load the FAISS index file.</p>"
93
-
94
-
95
- # --- Generation Logic ---
96
- prompt = f"A creative and delicious recipe that features {user_ingredients}.\n\nRecipe Title:"
97
- # Let's limit the new tokens to a reasonable amount to prevent overly long responses
98
- generated_outputs = generator(prompt, max_new_tokens=100, num_return_sequences=1)
99
- generated_text = generated_outputs[0]['generated_text']
100
 
101
- # --- FIX: Clean up the generated text ---
102
- # We remove the original prompt from the start of the generated text.
103
- # This prevents sending unnecessary data and fixes the error.
104
- if prompt in generated_text:
105
- cleaned_generated_text = generated_text[len(prompt):].strip()
106
- else:
107
- cleaned_generated_text = generated_text.strip()
108
-
109
- gen_html = "<h2>AI-Generated Idea:</h2>"
110
- # Also, replace newline characters with HTML line breaks for better formatting
111
- gen_html += cleaned_generated_text.replace("\n", "<br>")
112
-
113
- return recs_html, gen_html
114
- # --- 3. CREATE AND LAUNCH THE GRADIO INTERFACE ---
115
-
116
- examples = [
117
- ["chicken, potatoes, carrots, onions"],
118
- ["beef, soy sauce, ginger, rice"],
119
- ["tomatoes, basil, mozzarella, olive oil"],
120
- ]
121
-
122
- demo = gr.Interface(
123
- fn=find_and_generate,
124
- inputs=gr.Textbox(lines=3, label="Enter Your Ingredients (comma-separated)"),
125
- outputs=[
126
- gr.HTML(label="Similar Recipes"),
127
- gr.HTML(label="AI Generated Recipe")
128
- ],
129
- title="🍳 Recipe Genius",
130
- description="Your personal AI chef! Enter the ingredients you have, and get 3 real recipe recommendations plus 1 new AI-generated idea.",
131
- examples=examples,
132
- theme=gr.themes.Soft()
133
- )
134
-
135
- # Launch the app!
136
- demo.launch(share=True)
 
 
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer
4
  import faiss
5
  import numpy as np
6
+ import os
 
7
  from transformers import pipeline
8
  import time
9
 
10
+ # --- 1. DATA LOADING AND PREPROCESSING ---
11
+ print("===== Application Startup =====")
 
 
12
  start_time = time.time()
13
 
14
+ # Load dataset
15
  dataset = load_dataset("corbt/all-recipes", split="train[:20000]")
16
 
17
+ # Preprocessing functions
18
  def extract_title_and_ingredients(sample):
 
 
 
19
  extraction = sample['input'][:sample['input'].find("Directions")]
20
+ return {"text_for_embedding": extraction}
 
 
21
 
 
 
 
 
22
  def extract_each_feature(sample):
23
+ title = sample['input'][:sample['input'].find("\\n")]
24
+ ingredients = sample['input'][sample['input'].find("Ingredients")+len("Ingredients:\\n"):sample['input'].find("Directions")].strip()
25
+ directions = sample['input'][sample['input'].find("Directions")+len("Directions:\\n"):].strip()
26
+ return {"title": title, "ingredients": ingredients, "directions": directions}
 
 
 
 
 
 
 
27
 
28
+ # Apply preprocessing
29
+ dataset = dataset.map(extract_title_and_ingredients)
30
  dataset = dataset.map(extract_each_feature)
31
 
32
+ # --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
33
+ model_name = "all-MiniLM-L6-v2"
34
+ embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
35
 
36
+ # Compute embeddings
37
+ print("Loading dataset and embedding model...")
38
+ embeddings = embedding_model.encode(dataset['text_for_embedding'], show_progress_bar=True)
39
+ embeddings = np.array(embeddings, dtype=np.float32)
40
+
41
+ # Build FAISS index
42
+ dimension = embeddings.shape[1]
43
+ index = faiss.IndexFlatL2(dimension)
44
+ index.add(embeddings)
45
+ print(f"Index is ready. Total vectors in index: {index.ntotal}")
46
+
47
+ # --- 3. SYNTHETIC GENERATION ---
48
+ generator = pipeline('text-generation', model='gpt2')
49
+
50
+ def get_recommendations_and_generate(query_ingredients, k=3):
51
+ # 1. Get Recommendations
52
+ query_vector = embedding_model.encode([query_ingredients])
53
+ query_vector = np.array(query_vector, dtype=np.float32)
54
+ distances, indices = index.search(query_vector, k)
55
+
56
+ results = []
57
+ for i, idx_numpy in enumerate(indices[0]):
58
+ idx = int(idx_numpy) # FIX: Convert numpy.int64 to standard Python int
59
+ recipe = {
60
+ "title": dataset[idx]['title'],
61
+ "ingredients": dataset[idx]['ingredients'],
62
+ "directions": dataset[idx]['directions']
63
+ }
64
+ results.append(recipe)
65
+
66
+ # 2. Generate a new recipe idea
67
+ prompt = f"Create a short, simple recipe title and a list of ingredients using: {query_ingredients}."
68
+ generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
69
+
70
+ # Clean up generated text to be more readable
71
+ # (This is a basic cleanup, can be improved)
72
+ generated_recipe_parts = generated_text.split("Ingredients:")
73
+ generated_title = generated_recipe_parts[0].replace(prompt.replace(f"using: {query_ingredients}",""), "").strip()
74
+ generated_ingredients = generated_recipe_parts[1].strip() if len(generated_recipe_parts) > 1 else "Could not determine ingredients."
75
+
76
+ generated_recipe = {
77
+ "title": generated_title,
78
+ "ingredients": generated_ingredients,
79
+ "directions": "This is an AI-generated idea. Directions are not provided."
80
+ }
81
+
82
+ return results[0], results[1], results[2], generated_recipe
83
 
84
+ # --- 4. GRADIO USER INTERFACE ---
85
+ def format_recipe(recipe):
86
+ if not recipe or not recipe['title']:
87
+ return "### No recipe found."
88
+ return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
89
+
90
+ def recipe_wizard(ingredients):
91
+ rec1, rec2, rec3, gen_rec = get_recommendations_and_generate(ingredients)
92
+ return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_recipe(gen_rec)
93
 
94
  end_time = time.time()
95
  print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
96
 
97
+ # Gradio Interface
98
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
99
+ gr.Markdown("# 🍳 RecipeWizard AI")
100
+ gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
101
+
102
+ with gr.Row():
103
+ ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
104
+ submit_btn = gr.Button("Get Recipes")
105
+
106
+ with gr.Row():
107
+ with gr.Column():
108
+ gr.Markdown("### Recommended Recipes")
109
+ output_rec1 = gr.Markdown()
110
+ output_rec2 = gr.Markdown()
111
+ output_rec3 = gr.Markdown()
112
+ with gr.Column():
113
+ gr.Markdown("### AI-Generated Idea")
114
+ output_gen = gr.Markdown()
115
+
116
+ submit_btn.click(
117
+ fn=recipe_wizard,
118
+ inputs=ingredient_input,
119
+ outputs=[output_rec1, output_rec2, output_rec3, output_gen]
120
+ )
 
 
 
 
 
 
 
 
 
 
121
 
122
+ gr.Examples(
123
+ examples=[
124
+ ["chicken, broccoli, cheese"],
125
+ ["ground beef, potatoes, onions"],
126
+ ["flour, sugar, eggs, butter"]
127
+ ],
128
+ inputs=ingredient_input
129
+ )
130
+
131
+ demo.launch()