Nofarsagi123 commited on
Commit
4d68da2
·
verified ·
1 Parent(s): 823edb4

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app (4).py +161 -0
  3. recipe_index (2).faiss +3 -0
  4. requirements (4).txt +8 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  recipe_index.faiss filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  recipe_index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ recipe_index[[:space:]](2).faiss filter=lfs diff=lfs merge=lfs -text
app (4).py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import faiss
5
+ import numpy as np
6
+ from transformers import pipeline
7
+ import time
8
+
9
+ # --- 1. DATA LOADING AND PREPROCESSING ---
10
+ print("===== Application Startup =====")
11
+ start_time = time.time()
12
+
13
+ # Load dataset and limit to the first 20,000 rows
14
+ dataset = load_dataset("corbt/all-recipes", split="train[:20000]")
15
+
16
+ # Preprocessing functions to extract features from the raw text
17
+ def extract_each_feature(sample):
18
+ full_text = sample['input']
19
+
20
+ # User's fix: Use "\n" instead of "\\n" to correctly find the title
21
+ title = full_text[:full_text.find("\n")]
22
+
23
+ ingredients = "Not available"
24
+ directions = "Not available"
25
+
26
+ ingredients_start_index = full_text.find("Ingredients:")
27
+ directions_start_index = full_text.find("Directions:")
28
+
29
+ if ingredients_start_index != -1 and directions_start_index != -1:
30
+ ingredients = full_text[ingredients_start_index + len("Ingredients:"):directions_start_index].strip()
31
+
32
+ if directions_start_index != -1:
33
+ directions_raw = full_text[directions_start_index + len("Directions:"):].strip()
34
+
35
+ next_ing_index = directions_raw.find("Ingredients:")
36
+ next_dir_index = directions_raw.find("Directions:")
37
+
38
+ cut_off_indices = [idx for idx in [next_ing_index, next_dir_index] if idx != -1]
39
+ if cut_off_indices:
40
+ cut_off_point = min(cut_off_indices)
41
+ directions = directions_raw[:cut_off_point].strip()
42
+ else:
43
+ directions = directions_raw
44
+
45
+ return {
46
+ "title": title,
47
+ "ingredients": ingredients,
48
+ "directions": directions,
49
+ }
50
+
51
+ # Apply preprocessing
52
+ dataset = dataset.map(extract_each_feature)
53
+
54
+ # --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
55
+ print("Loading embedding model...")
56
+ model_name = "all-MiniLM-L6-v2"
57
+ embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
58
+
59
+ index_file = "recipe_index.faiss"
60
+
61
+ print(f"Loading FAISS index from {index_file}...")
62
+ index = faiss.read_index(index_file)
63
+ print(f"Index is ready. Total vectors in index: {index.ntotal}")
64
+
65
+ # --- 3. SYNTHETIC GENERATION (IMPROVED) ---
66
+ print("Loading generative model...")
67
+ generator = pipeline('text-generation', model='gpt2')
68
+
69
+ def get_recommendations_and_generate(query_ingredients, k=3):
70
+ # 1. Get Recommendations
71
+ query_vector = embedding_model.encode([query_ingredients])
72
+ query_vector = np.array(query_vector, dtype=np.float32)
73
+ distances, indices = index.search(query_vector, k)
74
+
75
+ results = []
76
+ for idx_numpy in indices[0]:
77
+ idx = int(idx_numpy)
78
+ recipe = {
79
+ "title": dataset[idx]['title'],
80
+ "ingredients": dataset[idx]['ingredients'],
81
+ "directions": dataset[idx]['directions']
82
+ }
83
+ results.append(recipe)
84
+
85
+ while len(results) < 3:
86
+ results.append({"title": "No recipe found", "ingredients": "", "directions": ""})
87
+
88
+ # 2. Generate 10 new recipe ideas with a simpler, more direct prompt
89
+ prompt = f"Write a complete recipe that includes a title, a list of ingredients, and step-by-step directions. The recipe must use the following ingredients: {query_ingredients}."
90
+
91
+ # Optimized for speed by reducing max_new_tokens
92
+ generated_outputs = generator(prompt, max_new_tokens=180, num_return_sequences=10, pad_token_id=50256)
93
+
94
+ # 3. Find the best recipe out of the 10 generated
95
+ generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
96
+
97
+ # Embed all 10 generated texts
98
+ generated_embeddings = embedding_model.encode(generated_texts)
99
+
100
+ # Calculate cosine similarity between the user's query and each generated text
101
+ similarities = util.cos_sim(query_vector, generated_embeddings)
102
+
103
+ # Find the index of the most similar generated recipe
104
+ best_recipe_index = np.argmax(similarities)
105
+ best_generated_recipe = generated_texts[best_recipe_index]
106
+
107
+ return results[0], results[1], results[2], best_generated_recipe
108
+
109
+ # --- 4. GRADIO USER INTERFACE ---
110
+ def format_recipe(recipe):
111
+ # Formats the recommended recipes with markdown
112
+ if not recipe or not recipe['title']:
113
+ return "### No recipe found."
114
+ return f"### {recipe['title']}\n**Ingredients:**\n{recipe['ingredients']}\n\n**Directions:**\n{recipe['directions']}"
115
+
116
+ def format_generated_recipe(recipe_text):
117
+ # Formats the AI-generated recipe as simple text, without extra markdown
118
+ return recipe_text
119
+
120
+ def recipe_wizard(ingredients):
121
+ rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(ingredients)
122
+ return format_recipe(rec1), format_recipe(rec2), format_recipe(rec3), format_generated_recipe(gen_rec_text)
123
+
124
+ end_time = time.time()
125
+ print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
126
+
127
+ # Gradio Interface
128
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
129
+ gr.Markdown("# 🍳 RecipeWizard AI")
130
+ gr.Markdown("Enter the ingredients you have, and get recipe recommendations plus a new AI-generated idea!")
131
+
132
+ with gr.Row():
133
+ ingredient_input = gr.Textbox(label="Your Ingredients", placeholder="e.g., chicken, rice, tomatoes, garlic")
134
+ submit_btn = gr.Button("Get Recipes")
135
+
136
+ with gr.Row():
137
+ with gr.Column(scale=2):
138
+ gr.Markdown("### Recommended Recipes")
139
+ output_rec1 = gr.Markdown()
140
+ output_rec2 = gr.Markdown()
141
+ output_rec3 = gr.Markdown()
142
+ with gr.Column(scale=1):
143
+ gr.Markdown("### ✨ AI-Generated Idea")
144
+ output_gen = gr.Textbox(label="AI Generated Recipe", lines=15) # Changed to Textbox for plain text
145
+
146
+ submit_btn.click(
147
+ fn=recipe_wizard,
148
+ inputs=ingredient_input,
149
+ outputs=[output_rec1, output_rec2, output_rec3, output_gen]
150
+ )
151
+
152
+ gr.Examples(
153
+ examples=[
154
+ ["chicken, broccoli, cheese"],
155
+ ["ground beef, potatoes, onions"],
156
+ ["flour, sugar, eggs, butter"]
157
+ ],
158
+ inputs=ingredient_input
159
+ )
160
+
161
+ demo.launch()
recipe_index (2).faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b9a5c8797e28a0fe4130d9af7ccdb897d0849110ea43765aee3b7b670b14ef
3
+ size 30720045
requirements (4).txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch==2.1.0
2
+ faiss-cpu==1.7.4
3
+ gradio==4.8.0
4
+ pyarrow==14.0.1
5
+ datasets==2.15.0
6
+ transformers==4.35.2
7
+ sentence-transformers==2.3.1
8
+ huggingface-hub==0.19.4