garvitcpp commited on
Commit
c30b4ba
·
verified ·
1 Parent(s): 9ba18c7

Upload 48 files

Browse files
Files changed (49) hide show
  1. .gitattributes +1 -0
  2. app/__init__.py +18 -0
  3. app/__pycache__/__init__.cpython-312.pyc +0 -0
  4. app/api/__pycache__/routes.cpython-312.pyc +0 -0
  5. app/api/routes.py +151 -0
  6. app/main.py +3 -0
  7. app/models/__pycache__/recipe.cpython-312.pyc +0 -0
  8. app/models/recipe.py +20 -0
  9. app/services/__pycache__/extraction.cpython-312.pyc +0 -0
  10. app/services/__pycache__/image_query.cpython-312.pyc +0 -0
  11. app/services/__pycache__/image_search.cpython-312.pyc +0 -0
  12. app/services/__pycache__/recommendation.cpython-312.pyc +0 -0
  13. app/services/extraction.py +701 -0
  14. app/services/image_query.py +50 -0
  15. app/services/image_search.py +126 -0
  16. app/services/recommendation.py +28 -0
  17. app/utils/__pycache__/data_loading.cpython-312.pyc +0 -0
  18. app/utils/__pycache__/data_preprocessing.cpython-312.pyc +0 -0
  19. app/utils/__pycache__/feature_engineering.cpython-312.pyc +0 -0
  20. app/utils/__pycache__/recommendation_utils.cpython-312.pyc +0 -0
  21. app/utils/__pycache__/similarity_calculation.cpython-312.pyc +0 -0
  22. app/utils/data_loading.py +47 -0
  23. app/utils/data_preprocessing.py +108 -0
  24. app/utils/feature_engineering.py +118 -0
  25. app/utils/recommendation_utils.py +66 -0
  26. app/utils/scrapers/__pycache__/allrecipes_scraper.cpython-312.pyc +0 -0
  27. app/utils/scrapers/__pycache__/base_scraper.cpython-312.pyc +0 -0
  28. app/utils/scrapers/__pycache__/food_network_scraper.cpython-312.pyc +0 -0
  29. app/utils/scrapers/__pycache__/fooddotcom_scraper.cpython-312.pyc +0 -0
  30. app/utils/scrapers/__pycache__/google_scraper.cpython-312.pyc +0 -0
  31. app/utils/scrapers/__pycache__/wikimedia_scraper.cpython-312.pyc +0 -0
  32. app/utils/scrapers/allrecipes_scraper.py +38 -0
  33. app/utils/scrapers/base_scraper.py +38 -0
  34. app/utils/scrapers/food_network_scraper.py +39 -0
  35. app/utils/scrapers/fooddotcom_scraper.py +65 -0
  36. app/utils/scrapers/google_scraper.py +42 -0
  37. app/utils/scrapers/wikimedia_scraper.py +45 -0
  38. app/utils/similarity_calculation.py +22 -0
  39. config.py +6 -0
  40. form_data.json +0 -0
  41. precomputed/category_dummies.joblib +3 -0
  42. precomputed/combined_matrix.npz +3 -0
  43. precomputed/df.joblib +3 -0
  44. precomputed/scaler.joblib +3 -0
  45. precomputed/tfidf_vectorizer_ingredients.joblib +3 -0
  46. precomputed/tfidf_vectorizer_keywords.joblib +3 -0
  47. precomputed/tfidf_vectorizer_keywords_name.joblib +3 -0
  48. recipe_dataset.csv +3 -0
  49. run.py +11 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ recipe_dataset.csv filter=lfs diff=lfs merge=lfs -text
app/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask
2
+ from app.api.routes import api_bp
3
+ from app.services.recommendation import FlexibleRecipeRecommendationSystem
4
+ from config import Config
5
+
6
+ def create_app(config_object=Config):
7
+ app = Flask(__name__)
8
+ app.config.from_object(config_object)
9
+
10
+ # Initialize the recommendation system with both CSV_FILE_PATH and PRECOMPUTED_DIR
11
+ app.recommendation_system = FlexibleRecipeRecommendationSystem(
12
+ app.config['CSV_FILE_PATH'],
13
+ app.config['PRECOMPUTED_DIR']
14
+ )
15
+
16
+ app.register_blueprint(api_bp)
17
+
18
+ return app
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (918 Bytes). View file
 
app/api/__pycache__/routes.cpython-312.pyc ADDED
Binary file (6.45 kB). View file
 
app/api/routes.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, Response, request, jsonify, current_app
2
+ from app.models.recipe import Recipe
3
+ import json
4
+ import asyncio
5
+ from app.services import extraction
6
+ from app.services import image_query
7
+
8
+ api_bp = Blueprint('api', __name__)
9
+
10
+ @api_bp.route('/form-data', methods=['GET'])
11
+ def get_form_data():
12
+ with open('form_data.json', 'r') as file:
13
+ data = json.load(file)
14
+ return jsonify(data)
15
+
16
+ @api_bp.route('/recommend', methods=['POST'])
17
+ async def recommend_recipes(): # Make this function async
18
+ data = request.json
19
+ category = data.get('category')
20
+ dietary_preference = data.get('dietary_preference')
21
+ ingredients = data.get('ingredients', [])
22
+ calories = data.get('calories')
23
+ time = data.get('time')
24
+ keywords = data.get('keywords', [])
25
+ keywords_name = data.get('keywords_name', [])
26
+
27
+ try:
28
+ if calories is not None:
29
+ calories = int(calories)
30
+ if time is not None:
31
+ time = int(time)
32
+ except ValueError:
33
+ return jsonify({"error": "Calories and time must be integers if provided"}), 400
34
+
35
+ # Use await to call the async function
36
+ recommendations = await current_app.recommendation_system.get_recommendations(
37
+ category=category,
38
+ dietary_preference=dietary_preference,
39
+ ingredients=ingredients,
40
+ calories=calories,
41
+ time=time,
42
+ keywords=keywords,
43
+ keywords_name=keywords_name
44
+ )
45
+
46
+ return jsonify([vars(recipe) for recipe in recommendations])
47
+
48
+ @api_bp.route('/extract-recipe-attributes', methods=['POST'])
49
+ async def recommend_recipes2():
50
+ try:
51
+ data = request.get_json()
52
+ if not data:
53
+ return jsonify({"error": "No data provided"}), 400
54
+
55
+ raw_text = data.get('text')
56
+ if not raw_text:
57
+ return jsonify({"error": "No search text provided"}), 400
58
+
59
+ # Extract recipe attributes
60
+ extracted_info = extraction.extract_recipe_attributes(raw_text) # Call the extraction function
61
+
62
+ # Check if extraction was successful
63
+ if 'error' in extracted_info:
64
+ return jsonify(extracted_info), 500
65
+
66
+ # Access the extracted attributes
67
+ category = extracted_info.get('category', '')
68
+ calories = extracted_info.get('calories', None)
69
+ time = extracted_info.get('time', None)
70
+ keywords = extracted_info.get('keywords', [])
71
+ keywords_name = extracted_info.get('keywords_name', [])
72
+
73
+ # Convert calories and time to integers if they exist
74
+ try:
75
+ calories = int(calories) if calories else None
76
+ time = int(time) if time else None
77
+ except (ValueError, TypeError):
78
+ return jsonify({"error": "Invalid calories or time value"}), 400
79
+
80
+ # Get recommendations using the recommendation system
81
+ recommendations = await current_app.recommendation_system.get_recommendations(
82
+ category=category,
83
+ ingredients=[], # Adjust if you plan to add ingredients in the extraction function
84
+ calories=calories,
85
+ time=time,
86
+ keywords=keywords,
87
+ keywords_name=keywords_name
88
+ )
89
+
90
+ # Convert recommendations to JSON-serializable format
91
+ recipe_list = [vars(recipe) for recipe in recommendations]
92
+
93
+ return jsonify(recipe_list)
94
+
95
+ except Exception as e:
96
+ return jsonify({"error": str(e)}), 500
97
+
98
+ # searchImage
99
+ @api_bp.route('/analyze-food-image', methods=['POST'])
100
+ async def handle_analyze_food_image():
101
+ try:
102
+ if 'image' not in request.files:
103
+ return jsonify({"error": "No image file provided"}), 400
104
+
105
+ file = request.files['image']
106
+
107
+ if file.filename == '':
108
+ return jsonify({"error": "No selected file"}), 400
109
+
110
+ # Call the analyze function with the file
111
+ description = image_query.analyze_food_image(file)
112
+
113
+ # Extract recipe attributes
114
+ extracted_info = extraction.extract_recipe_attributes(description) # Call the extraction function
115
+
116
+ # Check if extraction was successful
117
+ if 'error' in extracted_info:
118
+ return jsonify(extracted_info), 500
119
+
120
+ # Access the extracted attributes
121
+ category = extracted_info.get('category', '')
122
+ calories = extracted_info.get('calories', None)
123
+ time = extracted_info.get('time', None)
124
+ keywords = extracted_info.get('keywords', [])
125
+ keywords_name = extracted_info.get('keywords_name', [])
126
+
127
+ # Convert calories and time to integers if they exist
128
+ try:
129
+ calories = int(calories) if calories else None
130
+ time = int(time) if time else None
131
+ except (ValueError, TypeError):
132
+ return jsonify({"error": "Invalid calories or time value"}), 400
133
+
134
+ # Get recommendations using the recommendation system
135
+ recommendations = await current_app.recommendation_system.get_recommendations(
136
+ category=category,
137
+ ingredients=[], # Adjust if you plan to add ingredients in the extraction function
138
+ calories=calories,
139
+ time=time,
140
+ keywords=keywords,
141
+ keywords_name=keywords_name
142
+ )
143
+
144
+ # Convert recommendations to JSON-serializable format
145
+ recipe_list = [vars(recipe) for recipe in recommendations]
146
+
147
+ return jsonify(recipe_list)
148
+
149
+ except Exception as e:
150
+ return jsonify({"error": str(e)}), 500
151
+
app/main.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from app import create_app
2
+
3
+ app = create_app()
app/models/__pycache__/recipe.cpython-312.pyc ADDED
Binary file (1 kB). View file
 
app/models/recipe.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import List
3
+
4
+ @dataclass
5
+ class Recipe:
6
+ RecipeId: int
7
+ Name: str
8
+ RecipeCategory: str
9
+ RecipeIngredientParts: List[str]
10
+ Keywords: List[str]
11
+ keywords_name: List[str] # Add this line
12
+ Calories: float
13
+ TotalTime_minutes: int
14
+ AggregatedRating: float
15
+ ReviewCount: int
16
+ Description: str
17
+ RecipeIngredientQuantities: List[str]
18
+ RecipeInstructions: List[str]
19
+ Images: List[str]
20
+ Similarity: float # Add this line if it's not already present
app/services/__pycache__/extraction.cpython-312.pyc ADDED
Binary file (19.5 kB). View file
 
app/services/__pycache__/image_query.cpython-312.pyc ADDED
Binary file (2.38 kB). View file
 
app/services/__pycache__/image_search.cpython-312.pyc ADDED
Binary file (7.84 kB). View file
 
app/services/__pycache__/recommendation.cpython-312.pyc ADDED
Binary file (2.09 kB). View file
 
app/services/extraction.py ADDED
@@ -0,0 +1,701 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import json
3
+ from difflib import get_close_matches
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from difflib import SequenceMatcher
7
+
8
+ load_dotenv()
9
+ openai.api_key = os.getenv("OPENAI_API_KEY")
10
+
11
+ # Define categories from dataset
12
+ RECIPE_CATEGORIES = [
13
+ "frozen desserts",
14
+ "chicken breast",
15
+ "beverages",
16
+ "soy/tofu",
17
+ "vegetable",
18
+ "pie",
19
+ "chicken",
20
+ "dessert",
21
+ "southwestern u.s.",
22
+ "sauces",
23
+ "stew",
24
+ "black beans",
25
+ "< 60 mins",
26
+ "lactose free",
27
+ "yeast breads",
28
+ "whole chicken",
29
+ "cheesecake",
30
+ "free of...",
31
+ "brazilian",
32
+ "breakfast",
33
+ "breads",
34
+ "bar cookie",
35
+ "brown rice",
36
+ "oranges",
37
+ "pork",
38
+ "low protein",
39
+ "asian",
40
+ "potato",
41
+ "cheese",
42
+ "halibut",
43
+ "meat",
44
+ "lamb/sheep",
45
+ "very low carbs",
46
+ "spaghetti",
47
+ "scones",
48
+ "drop cookies",
49
+ "lunch/snacks",
50
+ "beans",
51
+ "punch beverage",
52
+ "pineapple",
53
+ "quick breads",
54
+ "sourdough breads",
55
+ "curries",
56
+ "chicken livers",
57
+ "coconut",
58
+ "savory pies",
59
+ "poultry",
60
+ "steak",
61
+ "healthy",
62
+ "rice",
63
+ "apple",
64
+ "spreads",
65
+ "crab",
66
+ "jellies",
67
+ "pears",
68
+ "chowders",
69
+ "cauliflower",
70
+ "candy",
71
+ "chutneys",
72
+ "white rice",
73
+ "tex mex",
74
+ "bass",
75
+ "fruit",
76
+ "european",
77
+ "smoothies",
78
+ "manicotti",
79
+ "onions",
80
+ "new zealand",
81
+ "chicken thigh & leg",
82
+ "indonesian",
83
+ "greek",
84
+ "corn",
85
+ "lentil",
86
+ "long grain rice",
87
+ "southwest asia (middle east)",
88
+ "spanish",
89
+ "dutch",
90
+ "gelatin",
91
+ "tuna",
92
+ "citrus",
93
+ "berries",
94
+ "peppers",
95
+ "salad dressings",
96
+ "clear soup",
97
+ "mexican",
98
+ "raspberries",
99
+ "crawfish",
100
+ "beef organ meats",
101
+ "lobster",
102
+ "strawberry",
103
+ "shakes",
104
+ "short grain rice",
105
+ "< 15 mins",
106
+ "german",
107
+ "one dish meal",
108
+ "thai",
109
+ "cajun",
110
+ "russian",
111
+ "melons",
112
+ "swiss",
113
+ "papaya",
114
+ "veal",
115
+ "orange roughy",
116
+ "canadian",
117
+ "caribbean",
118
+ "mussels",
119
+ "medium grain rice",
120
+ "japanese",
121
+ "penne",
122
+ "elk",
123
+ "colombian",
124
+ "gumbo",
125
+ "roast beef",
126
+ "perch",
127
+ "vietnamese",
128
+ "rabbit",
129
+ "lebanese",
130
+ "turkish",
131
+ "kid friendly",
132
+ "whole turkey",
133
+ "chinese",
134
+ "grains",
135
+ "yam/sweet potato",
136
+ "meatloaf",
137
+ "trout",
138
+ "african",
139
+ "ham",
140
+ "goose",
141
+ "pasta shells",
142
+ "stocks",
143
+ "meatballs",
144
+ "whole duck",
145
+ "scandinavian",
146
+ "greens",
147
+ "catfish",
148
+ "duck breasts",
149
+ "polish",
150
+ "deer",
151
+ "wild game",
152
+ "pheasant",
153
+ "hungarian",
154
+ "no shell fish",
155
+ "collard greens",
156
+ "tilapia",
157
+ "quail",
158
+ "moroccan",
159
+ "squid",
160
+ "korean",
161
+ "plums",
162
+ "danish",
163
+ "creole",
164
+ "mahi mahi",
165
+ "tarts",
166
+ "hawaiian",
167
+ "austrian",
168
+ "moose",
169
+ "native american",
170
+ "swedish",
171
+ "norwegian",
172
+ "ethiopian",
173
+ "belgian",
174
+ "australian",
175
+ "bear",
176
+ "scottish",
177
+ "tempeh",
178
+ "cuban",
179
+ "spinach",
180
+ "turkey breasts",
181
+ "cantonese",
182
+ "tropical fruits",
183
+ "peanut butter",
184
+ "szechuan",
185
+ "portuguese",
186
+ "costa rican",
187
+ "duck",
188
+ "nuts",
189
+ "filipino",
190
+ "pot pie",
191
+ "polynesian",
192
+ "mango",
193
+ "cherries",
194
+ "egyptian",
195
+ "chard",
196
+ "lime",
197
+ "lemon",
198
+ "kiwifruit",
199
+ "whitefish",
200
+ "south american",
201
+ "malaysian",
202
+ "octopus",
203
+ "nigerian",
204
+ "south african",
205
+ "nepalese",
206
+ "palestinian",
207
+ "czech",
208
+ "avocado",
209
+ "iraqi",
210
+ "pakistani",
211
+ "chocolate chip cookies",
212
+ "finnish",
213
+ "puerto rican",
214
+ "cambodian",
215
+ "honduran",
216
+ "mongolian",
217
+ "peruvian",
218
+ "turkey gravy",
219
+ "somalian",
220
+ "ice cream",
221
+ "oatmeal",
222
+ "artichoke",
223
+ "indian",
224
+ "grapes",
225
+ "macaroni and cheese",
226
+ "mashed potatoes",
227
+ "pumpkin",
228
+ "guatemalan"
229
+ ]
230
+
231
+ def find_closest_category(category):
232
+ """Find the closest matching category from the dataset."""
233
+ if not category:
234
+ return ""
235
+
236
+ # First check for exact match
237
+ if category.lower() in [c.lower() for c in RECIPE_CATEGORIES]:
238
+ return next(c for c in RECIPE_CATEGORIES if c.lower() == category.lower())
239
+
240
+ # For compound categories, check parts
241
+ category_parts = category.lower().split()
242
+ if len(category_parts) == 1 and category_parts[0] in [c.lower() for c in RECIPE_CATEGORIES]:
243
+ # If the input is a single word that exists in the category list, return it
244
+ return next(c for c in RECIPE_CATEGORIES if c.lower() == category_parts[0])
245
+
246
+ for part in category_parts:
247
+ matches = [c for c in RECIPE_CATEGORIES if part in c.lower()]
248
+ if matches:
249
+ return matches[0]
250
+
251
+ # If no matches found, use difflib to find closest match
252
+ matches = get_close_matches(category.lower(), [c.lower() for c in RECIPE_CATEGORIES], n=1, cutoff=0.75)
253
+ if matches:
254
+ closest_match = matches[0]
255
+ # Check if the closest match is close enough (similarity score > 0.8)
256
+ if SequenceMatcher(None, category.lower(), closest_match).ratio() > 0.8:
257
+ return next(c for c in RECIPE_CATEGORIES if c.lower() == closest_match)
258
+ else:
259
+ return ""
260
+
261
+ # If no match is found at all, return empty string
262
+ return ""
263
+
264
+ def extract_recipe_attributes(text):
265
+ messages = [
266
+ {"role": "system", "content": "You are an assistant that extracts recipe attributes from user input. If the input contains an uncommon or unrecognized category, add relevant general keywords based on common culinary types, such as 'beverages' for drinks, 'dessert' for sweets, etc."},
267
+ {"role": "user", "content": f"""
268
+ From the given text, identify:
269
+ - **category**: The main name or type of the recipe (like "chicken", "ice cream").
270
+ - **calories**: Number of calories, if mentioned.
271
+ - **time**: Time to cook, in minutes.
272
+ - **keywords**: Important words related to the recipe. If the category is not common (like "noodles" or "biryani"), include relevant characteristics (e.g., "asian", "main course", "stir fry", "quick meal", "wheat based", "high protein", etc).
273
+ - **keywords_name**: List of individual words from the category/name. For uncommon categories, include descriptive terms and related categories (e.g., for "noodles": ["asian", "pasta", "wheat", "main dish"]).
274
+
275
+ Examples:
276
+ ---
277
+ Input: "noodles"
278
+ Output: {{
279
+ "category": "",
280
+ "calories": "",
281
+ "time": "",
282
+ "keywords": ["asian", "stir fry", "wheat based", "quick meal", "main course", "pasta", "noodles"],
283
+ "keywords_name": ["asian", "pasta", "main dish", "wheat"]
284
+ }}
285
+
286
+ ---
287
+ Input: "biryani"
288
+ Output: {{
289
+ "category": "",
290
+ "calories": "",
291
+ "time": "",
292
+ "keywords": ["rice", "indian", "spicy", "main course", "one dish meal", "biryani"],
293
+ "keywords_name": ["rice", "indian", "spicy"]
294
+ }}
295
+
296
+ ---
297
+ Input: "sushi"
298
+ Output: {{
299
+ "category": "",
300
+ "calories": "",
301
+ "time": "",
302
+ "keywords": ["japanese", "rice", "seafood", "whitefish", "snack", "main course", "sushi"],
303
+ "keywords_name": ["japanese", "seafood", "rice"]
304
+ }}
305
+
306
+ ---
307
+ Input: "vegetable curry"
308
+ Output: {{
309
+ "category": "",
310
+ "calories": "",
311
+ "time": "",
312
+ "keywords": ["vegan", "vegetarian", "spicy", "main course", "curry", "indian"],
313
+ "keywords_name": ["indian", "vegetarian", "spicy"]
314
+ }}
315
+
316
+ ---
317
+ Input: "quinoa salad"
318
+ Output: {{
319
+ "category": "salad dressings",
320
+ "calories": "",
321
+ "time": "",
322
+ "keywords": ["healthy", "salad", "gluten-free", "fiber", "low calorie", "vegan"],
323
+ "keywords_name": ["healthy", "salad", "vegan"]
324
+ }}
325
+
326
+ ---
327
+ Input: "beef tacos"
328
+ Output: {{
329
+ "category": "beef organ meats",
330
+ "calories": "",
331
+ "time": "",
332
+ "keywords": ["mexican", "beef", "spicy", "snack", "tortilla", "street food"],
333
+ "keywords_name": ["mexican", "beef", "snack"]
334
+ }}
335
+
336
+ ---
337
+ Input: "caesar salad"
338
+ Output: {{
339
+ "category": "salad dressings",
340
+ "calories": "",
341
+ "time": "",
342
+ "keywords": ["salad", "appetizer", "healthy", "vegetables", "parmesan", "croutons"],
343
+ "keywords_name": ["salad", "appetizer", "healthy"]
344
+ }}
345
+
346
+
347
+ ---
348
+ Input: "smoothie bowl"
349
+ Output: {{
350
+ "category": "smoothies",
351
+ "calories": "",
352
+ "time": "",
353
+ "keywords": ["breakfast", "healthy", "fruits", "smoothies", "vegan", "fiber"],
354
+ "keywords_name": ["breakfast", "healthy", "fruits"]
355
+ }}
356
+
357
+ Input: "spaghetti bolognese"
358
+ Output: {{
359
+ "category": "spaghetti",
360
+ "calories": "",
361
+ "time": "",
362
+ "keywords": ["italian", "pasta", "meat", "tomato", "main course", "hearty"],
363
+ "keywords_name": ["italian", "pasta", "meat"]
364
+ }}
365
+
366
+ ---
367
+ Input: "I wish to cook chicken soup which contains around 200 calories within 30 mins"
368
+ Output: {{
369
+ "category": "chicken",
370
+ "calories": "200",
371
+ "time": "30",
372
+ "keywords": ["chicken", "soup", "200 calories", "30 mins"],
373
+ "keywords_name": ["chicken", "soup"]
374
+ }}
375
+
376
+ ---
377
+ Input: "Quick pasta recipe with 500 calories, ready in 20 mins"
378
+ Output: {{
379
+ "category": "pasta shells",
380
+ "calories": "500",
381
+ "time": "20",
382
+ "keywords": ["pasta shells", "500 calories", "20 mins"],
383
+ "keywords_name": ["pasta shells"]
384
+ }}
385
+
386
+ ---
387
+ Input: "uh i wish to cook something which contains protein"
388
+ Output: {{
389
+ "category": "low protein",
390
+ "calories": "",
391
+ "time": "",
392
+ "keywords": ["low protein", "high protein", "protein"],
393
+ "keywords_name": ["low protein"]
394
+ }}
395
+
396
+ ---
397
+ Input: "can you suggest something with low calories"
398
+ Output: {{
399
+ "category": "",
400
+ "calories": "",
401
+ "time": "",
402
+ "keywords": ["low calories"],
403
+ "keywords_name": ["low", "calories"]
404
+ }}
405
+
406
+ ---
407
+ Input: "looking for a vegetarian recipe"
408
+ Output: {{
409
+ "category": "",
410
+ "calories": "",
411
+ "time": "",
412
+ "keywords": ["vegetarian", "vegan"],
413
+ "keywords_name": ["vegetarian"]
414
+ }}
415
+
416
+ ---
417
+ Input: "need something gluten free"
418
+ Output: {{
419
+ "category": "",
420
+ "calories": "",
421
+ "time": "",
422
+ "keywords": ["gluten free"],
423
+ "keywords_name": ["gluten", "free"]
424
+ }}
425
+
426
+ ---
427
+ Input: "want to make something dairy free"
428
+ Output: {{
429
+ "category": "",
430
+ "calories": "",
431
+ "time": "",
432
+ "keywords": ["dairy free", "vegan"],
433
+ "keywords_name": ["dairy", "free"]
434
+ }}
435
+
436
+ ---
437
+ Input: "what can i cook for dinner"
438
+ Output: {{
439
+ "category": "",
440
+ "calories": "",
441
+ "time": "",
442
+ "keywords": ["dinner", "vegan"],
443
+ "keywords_name": [""]
444
+ }}
445
+
446
+ ---
447
+ Input: "what can i cook for breakfast"
448
+ Output: {{
449
+ "category": "",
450
+ "calories": "",
451
+ "time": "",
452
+ "keywords": ["breakfast", "vegan"],
453
+ "keywords_name": [""]
454
+ }}
455
+
456
+ ---
457
+ Input: "what can i cook for lunch"
458
+ Output: {{
459
+ "category": "",
460
+ "calories": "",
461
+ "time": "",
462
+ "keywords": ["lunch", "quick meal", "vegan"],
463
+ "keywords_name": [""]
464
+ }}
465
+
466
+ ---
467
+ Input: "something with low carbs"
468
+ Output: {{
469
+ "category": "very low carbs",
470
+ "calories": "",
471
+ "time": "",
472
+ "keywords": ["very low carbs", "low carbs", "carbs"],
473
+ "keywords_name": ["low", "carbs"]
474
+ }}
475
+
476
+ ---
477
+ Input: "i wish to cook something in 30 minutes"
478
+ Output: {{
479
+ "category": "",
480
+ "calories": "",
481
+ "time": "30",
482
+ "keywords": ["30 minutes", "quick meal"],
483
+ "keywords_name": [""]
484
+ }}
485
+
486
+ ---
487
+ Input: "I wish to make fish and stew"
488
+ Output: {{
489
+ "category": "stew",
490
+ "calories": "",
491
+ "time": "",
492
+ "keywords": ["fish", "stew", "high protein"],
493
+ "keywords_name": ["fish", "stew"]
494
+ }}
495
+
496
+ ---
497
+ Input: "I wish to make fish and stew"
498
+ Output: {{
499
+ "category": "catfish",
500
+ "calories": "",
501
+ "time": "",
502
+ "keywords": ["fish", "stew", "high protein"],
503
+ "keywords_name": ["fish", "stew"]
504
+ }}
505
+
506
+ ---
507
+ Input: "I wish to make fish and stew"
508
+ Output: {{
509
+ "category": "whitefish",
510
+ "calories": "",
511
+ "time": "",
512
+ "keywords": ["fish", "stew", "high protein"],
513
+ "keywords_name": ["fish", "stew"]
514
+ }}
515
+
516
+ ---
517
+ Input: "I wish to make fish and stew"
518
+ Output: {{
519
+ "category": "crawfish",
520
+ "calories": "",
521
+ "time": "",
522
+ "keywords": ["fish", "stew", "high protein"],
523
+ "keywords_name": ["fish", "stew"]
524
+ }}
525
+
526
+ ---
527
+ Input: "give some recipes involving almonds or dry fruits"
528
+ Output: {{
529
+ "category": "",
530
+ "calories": "",
531
+ "time": "",
532
+ "keywords": ["almonds", "dry fruits"],
533
+ "keywords_name": ["almonds", "dry fruits"]
534
+ }}
535
+
536
+ ---
537
+ Input: "tea with milk, sugar, water"
538
+ Output: {{
539
+ "category": "",
540
+ "calories": "",
541
+ "time": "",
542
+ "keywords": ["milk", "sugar", "water", "beverages"],
543
+ "keywords_name": [""]
544
+ }}
545
+
546
+ ---
547
+ Input: "chole bhature"
548
+ Output: {{
549
+ "category": "",
550
+ "calories": "",
551
+ "time": "",
552
+ "keywords": ["gluten free"],
553
+ "keywords_name": ["gluten", "free"]
554
+ }}
555
+
556
+ ---
557
+ Input: "something involving nuts"
558
+ Output: {{
559
+ "category": "nuts",
560
+ "calories": "",
561
+ "time": "",
562
+ "keywords": ["nuts", "snack", "healthy", "protein", "fiber"],
563
+ "keywords_name": ["nuts", "snack", "healthy"]
564
+ }}
565
+
566
+ ---
567
+ Now process this input:
568
+ Input: "{text}"
569
+ Output:
570
+ """}
571
+ ]
572
+
573
+ # Send the prompt to OpenAI API
574
+ response = openai.ChatCompletion.create(
575
+ model="gpt-3.5-turbo",
576
+ messages=messages,
577
+ temperature=0,
578
+ max_tokens=150,
579
+ top_p=1,
580
+ frequency_penalty=0,
581
+ presence_penalty=0,
582
+ )
583
+
584
+ # Process the response
585
+ output_text = response['choices'][0]['message']['content'].strip()
586
+
587
+ try:
588
+ result = json.loads(output_text)
589
+ # Update category with closest match from dataset
590
+ original_category = result["category"]
591
+ matched_category = find_closest_category(original_category)
592
+
593
+ if matched_category:
594
+ # If we found a match (exact or close), update category and keywords_name
595
+ result["category"] = matched_category
596
+ if original_category != matched_category:
597
+ result["keywords_name"] = matched_category.split()
598
+ else:
599
+ result["category"] = ""
600
+ # Add additional context-based keywords if category is empty
601
+ if "coffee" in text.lower() or "latte" in text.lower():
602
+ result["keywords"] = result.get("keywords", []) + ["coffee", "beverages", "caffeinated", "hot drink"]
603
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "caffeinated", "coffee"]
604
+ elif "espresso" in text.lower():
605
+ result["keywords"] = result.get("keywords", []) + ["beverages", "caffeinated", "espresso"]
606
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "espresso"]
607
+ elif "smoothie bowl" in text.lower():
608
+ result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie bowl"]
609
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie bowl"]
610
+ elif "kombucha" in text.lower():
611
+ result["keywords"] = result.get("keywords", []) + ["beverage", "fermented", "kombucha"]
612
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "kombucha"]
613
+ elif "herbal tea" in text.lower():
614
+ result["keywords"] = result.get("keywords", []) + ["beverages", "caffeine-free", "herbal tea"]
615
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "herbal tea"]
616
+ elif "seaweed" in text.lower():
617
+ result["keywords"] = result.get("keywords", []) + ["ingredient", "seafood", "seaweed"]
618
+ result["keywords_name"] = result.get("keywords_name", []) + ["seaweed"]
619
+ elif "vegan cheese" in text.lower():
620
+ result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "cheese"]
621
+ result["keywords_name"] = result.get("keywords_name", []) + ["vegan cheese"]
622
+ elif "air fryer" in text.lower():
623
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "air fryer", "healthy"]
624
+ result["keywords_name"] = result.get("keywords_name", []) + ["air fryer"]
625
+ elif "instant pot" in text.lower():
626
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "instant pot", "pressure cooker"]
627
+ result["keywords_name"] = result.get("keywords_name", []) + ["instant pot"]
628
+ elif "sous vide" in text.lower():
629
+ result["keywords"] = result.get("keywords", []) + ["cooking method", "sous vide", "precision cooking"]
630
+ result["keywords_name"] = result.get("keywords_name", []) + ["sous vide"]
631
+ elif "paleo" in text.lower():
632
+ result["keywords"] = result.get("keywords", []) + ["diet", "paleo", "low-carb"]
633
+ result["keywords_name"] = result.get("keywords_name", []) + ["paleo"]
634
+ elif "fodmap" in text.lower():
635
+ result["keywords"] = result.get("keywords", []) + ["diet", "fodmap", "digestive health"]
636
+ result["keywords_name"] = result.get("keywords_name", []) + ["fodmap"]
637
+ elif "cold brew" in text.lower():
638
+ result["keywords"] = result.get("keywords", []) + ["beverages", "caffeinated", "cold coffee"]
639
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "cold brew"]
640
+ elif "matcha" in text.lower():
641
+ result["keywords"] = result.get("keywords", []) + ["beverages", "green tea", "matcha"]
642
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "matcha"]
643
+ elif "smoothie" in text.lower():
644
+ result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie"]
645
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie"]
646
+ elif "protein shake" in text.lower():
647
+ result["keywords"] = result.get("keywords", []) + ["beverages", "high protein", "shake"]
648
+ result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "protein shake"]
649
+ elif "oat milk" in text.lower() or "almond milk" in text.lower():
650
+ result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "plant-based milk"]
651
+ result["keywords_name"] = result.get("keywords_name", []) + ["oat milk" if "oat" in text.lower() else "almond milk"]
652
+ elif "zoodles" in text.lower():
653
+ result["keywords"] = result.get("keywords", []) + ["low carb", "gluten-free", "vegetable noodles", "noodles"]
654
+ result["keywords_name"] = result.get("keywords_name", []) + ["zoodles", "noodles"]
655
+ elif "avocado toast" in text.lower():
656
+ result["keywords"] = result.get("keywords", []) + ["breakfast", "healthy", "avocado"]
657
+ result["keywords_name"] = result.get("keywords_name", []) + ["avocado toast"]
658
+ elif "golden milk" in text.lower():
659
+ result["keywords"] = result.get("keywords", []) + ["beverage", "turmeric", "anti-inflammatory"]
660
+ result["keywords_name"] = result.get("keywords_name", []) + ["golden milk"]
661
+ # Add other cases as needed
662
+
663
+ except json.JSONDecodeError:
664
+ result = {"error": "Failed to parse JSON", "output": output_text}
665
+
666
+ return result
667
+
668
+ # Example usage:
669
+ if __name__ == '__main__':
670
+ test_cases = [
671
+ # "noodles",
672
+ # "need a pasta recipe",
673
+ # "looking for a chicken dish",
674
+ # "want to make something with rice",
675
+ # "need a dessert recipe",
676
+ # "biryani",
677
+ # "30 mins",
678
+ # "chole bhature",
679
+ # "give some recipes involving almonds",
680
+ # "latte with foam, coffee, milk",
681
+ # "cold drink beverage",
682
+ # "beans",
683
+ # "coffee",
684
+ # "latte",
685
+ # "something involving nuts",
686
+ # "i wish to cook something with crab",
687
+ # "livers",
688
+ # "popcorn",
689
+ # "beef stew with potatoes, carrots, and herbs.",
690
+ # "dessert with chocolate, brownie, cake"
691
+ # "chocolate, brownie, cake, brown sugar",
692
+ # "avocado smoothie, avocado, milk, ice",
693
+ # "momo, momo, sauce",
694
+ # "I have basil, tomato and clove what can i make in 30 minutes",
695
+ "basil"
696
+ ]
697
+
698
+ for test_input in test_cases:
699
+ print(f"\nTesting: {test_input}")
700
+ result = extract_recipe_attributes(test_input)
701
+ print(json.dumps(result, indent=2))
app/services/image_query.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import google.generativeai as genai
3
+ import PIL.Image
4
+ import io
5
+ import os
6
+ from dotenv import load_dotenv
7
+ app = Flask(__name__)
8
+ load_dotenv()
9
+
10
+ # Configure Gemini API - get key from https://makersuite.google.com/app/apikey
11
+ GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
12
+ genai.configure(api_key=GOOGLE_API_KEY)
13
+
14
+ # Initialize the model - UPDATED MODEL NAME HERE
15
+ model = genai.GenerativeModel('gemini-1.5-flash') # Changed from gemini-pro-vision
16
+
17
+ def analyze_food_image(image_content) -> str:
18
+ """
19
+ Analyze image using Gemini API and return food description
20
+ """
21
+ try:
22
+ prompt = """
23
+ Look at this food image and:
24
+ 1. Identify the main dish/food item
25
+ 2. List visible ingredients or components, including individual words/strings of the main dish
26
+ 3. Return ONLY a simple description in this format: [main dish], [ingredients]
27
+ For example: "pizza, pizza, cheese, tomatoes, basil" or "chocolate cake, chocolate, cake, frosting, berries"
28
+ """
29
+
30
+ # Convert bytes to PIL Image
31
+ image_bytes = image_content.read()
32
+ image = PIL.Image.open(io.BytesIO(image_bytes))
33
+
34
+ # Generate response
35
+ response = model.generate_content([prompt, image])
36
+
37
+ # Clean and format the response
38
+ description = response.text.strip().lower()
39
+ description = description.replace('"', '').replace("'", '')
40
+
41
+ print(description) # For testing purpose
42
+
43
+ return description if description else "food dish"
44
+
45
+ except Exception as e:
46
+ print(f"Error in analysis: {str(e)}")
47
+ return f"food dish (Error: {str(e)})"
48
+
49
+ if __name__ == '__main__':
50
+ pass
app/services/image_search.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import asyncio
3
+ import aiohttp
4
+ import random
5
+ import re
6
+ from typing import List, Union
7
+ from app.utils.scrapers.google_scraper import GoogleScraper
8
+ from app.utils.scrapers.food_network_scraper import FoodNetworkScraper
9
+ from app.utils.scrapers.allrecipes_scraper import AllRecipesScraper
10
+ from app.utils.scrapers.wikimedia_scraper import WikimediaScraper
11
+ from app.utils.scrapers.fooddotcom_scraper import FoodDotComScraper
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class ImageSearchService:
16
+ def __init__(self):
17
+ self.scrapers = [
18
+ GoogleScraper(),
19
+ FoodNetworkScraper(),
20
+ AllRecipesScraper(),
21
+ WikimediaScraper(),
22
+ FoodDotComScraper()
23
+ ]
24
+ self.session = None
25
+ self.placeholder_images = [
26
+ "https://drive.google.com/file/d/1gYOjs06yiq7EUXaO19BE-L7MkrTR6wlc/view?usp=sharing",
27
+ "https://drive.google.com/file/d/1ob4KbzVLtwsE_ckYKBu_70FLEXNCJRSr/view?usp=sharing",
28
+ "https://drive.google.com/file/d/1UUv3zF1ouXteZVt8Oc_UXORcJrlWfRXR/view?usp=sharing"
29
+ ]
30
+
31
+ async def __aenter__(self):
32
+ if self.session is None:
33
+ self.session = aiohttp.ClientSession()
34
+ for scraper in self.scrapers:
35
+ scraper.session = self.session
36
+ logger.info("ImageSearchService session initialized")
37
+ return self
38
+
39
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
40
+ if self.session:
41
+ await self.session.close()
42
+ self.session = None
43
+ logger.info("ImageSearchService session closed")
44
+
45
+ async def search_recipe_images(self, recipe_name: str, image_data: Union[str, float, int], num_images: int = 3) -> List[str]:
46
+ logger.info(f"Searching images for recipe: {recipe_name}")
47
+
48
+ # First try to get existing URLs from the database
49
+ existing_urls = self.extract_urls_from_image_column(image_data)
50
+ if existing_urls:
51
+ logger.info(f"Found {len(existing_urls)} existing URLs")
52
+ return existing_urls[:num_images]
53
+
54
+ try:
55
+ # Try to get images from scrapers
56
+ all_results = []
57
+ tasks = []
58
+
59
+ for scraper in self.scrapers:
60
+ task = asyncio.create_task(scraper.search_images(recipe_name, num_images))
61
+ tasks.append(task)
62
+
63
+ logger.info(f"Created {len(tasks)} scraper tasks")
64
+ done, pending = await asyncio.wait(tasks, timeout=60)
65
+
66
+ for task in pending:
67
+ logger.warning(f"Cancelling pending task for {task.get_coro().__name__}")
68
+ task.cancel()
69
+
70
+ for task in done:
71
+ try:
72
+ results = await task
73
+ logger.info(f"Scraper {task.get_coro().__name__} found {len(results)} images")
74
+ all_results.extend(results)
75
+ except Exception as e:
76
+ logger.error(f"Error in scraper task {task.get_coro().__name__}: {str(e)}")
77
+
78
+ # Get unique results
79
+ seen = set()
80
+ unique_results = []
81
+ for url in all_results:
82
+ if url not in seen:
83
+ seen.add(url)
84
+ unique_results.append(url)
85
+
86
+ if unique_results:
87
+ logger.info(f"Found {len(unique_results)} unique image URLs")
88
+ return unique_results[:num_images]
89
+
90
+ # If no images found, return random placeholder images
91
+ logger.info("No images found, using placeholder images")
92
+ selected_placeholders = []
93
+ for _ in range(num_images):
94
+ placeholder = random.choice(self.placeholder_images)
95
+ while placeholder in selected_placeholders and len(selected_placeholders) < len(self.placeholder_images):
96
+ placeholder = random.choice(self.placeholder_images)
97
+ selected_placeholders.append(placeholder)
98
+
99
+ return selected_placeholders
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error in image search: {str(e)}")
103
+ # Return placeholder images even in case of error
104
+ return random.sample(self.placeholder_images, min(num_images, len(self.placeholder_images)))
105
+
106
+ def extract_urls_from_image_column(self, image_data: Union[str, float, int]) -> List[str]:
107
+ logger.debug(f"Extracting URLs from image data: {image_data}")
108
+ if image_data is None or image_data == 'NA' or isinstance(image_data, (float, int)):
109
+ logger.debug("No valid image data found in database")
110
+ return []
111
+
112
+ try:
113
+ image_data_str = str(image_data)
114
+ urls = []
115
+ if image_data_str.startswith('c(') and image_data_str.endswith(')'):
116
+ content = image_data_str[2:-1].strip()
117
+ parts = re.findall(r'"([^"]*)"', content)
118
+ urls = [url for url in parts if url.startswith('http')]
119
+ else:
120
+ urls = re.findall(r'https?://[^\s,"\')]+', image_data_str)
121
+
122
+ logger.info(f"Extracted {len(urls)} URLs from image column")
123
+ return urls
124
+ except Exception as e:
125
+ logger.error(f"Error extracting URLs from image data: {str(e)}")
126
+ return []
app/services/recommendation.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from app.services.image_search import ImageSearchService
3
+ from app.utils.data_loading import load_or_create_data
4
+ from app.utils.recommendation_utils import get_top_recommendations
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class FlexibleRecipeRecommendationSystem:
9
+ def __init__(self, csv_file_path, precomputed_dir):
10
+ self.feature_weights = {
11
+ 'ingredients': 0.15, 'category': 0.25, 'dietary': 0.20,
12
+ 'calories': 0.10, 'time': 0.10, 'keywords': 0.10, 'keywords_name': 0.10
13
+ }
14
+ self.image_search_service = ImageSearchService()
15
+ self.data = load_or_create_data(csv_file_path, precomputed_dir, self.feature_weights)
16
+
17
+ async def get_recommendations(self, category=None, dietary_preference=None, ingredients=None,
18
+ calories=None, time=None, keywords=None, keywords_name=None, top_n=6):
19
+ return await get_top_recommendations(
20
+ self.data['df'], self.data['combined_matrix'],
21
+ self.data['tfidf_vectorizer_ingredients'],
22
+ self.data['tfidf_vectorizer_keywords'],
23
+ self.data['tfidf_vectorizer_keywords_name'],
24
+ self.data['category_dummies'], self.data['scaler'],
25
+ self.feature_weights, self.image_search_service,
26
+ category, dietary_preference, ingredients,
27
+ calories, time, keywords, keywords_name, top_n
28
+ )
app/utils/__pycache__/data_loading.cpython-312.pyc ADDED
Binary file (2.95 kB). View file
 
app/utils/__pycache__/data_preprocessing.cpython-312.pyc ADDED
Binary file (4.73 kB). View file
 
app/utils/__pycache__/feature_engineering.cpython-312.pyc ADDED
Binary file (6.39 kB). View file
 
app/utils/__pycache__/recommendation_utils.cpython-312.pyc ADDED
Binary file (3.45 kB). View file
 
app/utils/__pycache__/similarity_calculation.cpython-312.pyc ADDED
Binary file (1.31 kB). View file
 
app/utils/data_loading.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ from scipy.sparse import save_npz, load_npz
4
+ import pandas as pd
5
+ from app.utils.data_preprocessing import preprocess_data
6
+ from app.utils.feature_engineering import create_feature_matrices
7
+
8
+ def load_or_create_data(csv_file_path, precomputed_dir, feature_weights):
9
+ files = ['df', 'tfidf_vectorizer_ingredients', 'tfidf_vectorizer_keywords',
10
+ 'tfidf_vectorizer_keywords_name', 'category_dummies', 'scaler']
11
+
12
+ if all(os.path.exists(os.path.join(precomputed_dir, f'{f}.joblib')) for f in files) and \
13
+ os.path.exists(os.path.join(precomputed_dir, 'combined_matrix.npz')):
14
+ return load_precomputed_data(precomputed_dir)
15
+ else:
16
+ return compute_and_save_data(csv_file_path, precomputed_dir, feature_weights)
17
+
18
+ def load_precomputed_data(precomputed_dir):
19
+ data = {}
20
+ for f in ['df', 'tfidf_vectorizer_ingredients', 'tfidf_vectorizer_keywords',
21
+ 'tfidf_vectorizer_keywords_name', 'category_dummies', 'scaler']:
22
+ data[f] = joblib.load(os.path.join(precomputed_dir, f'{f}.joblib'))
23
+ data['combined_matrix'] = load_npz(os.path.join(precomputed_dir, 'combined_matrix.npz'))
24
+ return data
25
+
26
+ def compute_and_save_data(csv_file_path, precomputed_dir, feature_weights):
27
+ df = preprocess_data(pd.read_csv(csv_file_path))
28
+ results = create_feature_matrices(df, feature_weights)
29
+ combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords, \
30
+ tfidf_vectorizer_keywords_name, category_dummies, scaler = results
31
+
32
+ os.makedirs(precomputed_dir, exist_ok=True)
33
+ data = {
34
+ 'df': df,
35
+ 'tfidf_vectorizer_ingredients': tfidf_vectorizer_ingredients,
36
+ 'tfidf_vectorizer_keywords': tfidf_vectorizer_keywords,
37
+ 'tfidf_vectorizer_keywords_name': tfidf_vectorizer_keywords_name,
38
+ 'category_dummies': category_dummies,
39
+ 'scaler': scaler,
40
+ 'combined_matrix': combined_matrix
41
+ }
42
+ for name, obj in data.items():
43
+ if name == 'combined_matrix':
44
+ save_npz(os.path.join(precomputed_dir, f'{name}.npz'), obj)
45
+ else:
46
+ joblib.dump(obj, os.path.join(precomputed_dir, f'{name}.joblib'))
47
+ return data
app/utils/data_preprocessing.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import ast
4
+ import logging
5
+ import re
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def parse_r_vector(s):
10
+ """
11
+ Parse R vector format strings like c("word1", "word2") into Python lists.
12
+
13
+ Args:
14
+ s: String in R vector format
15
+
16
+ Returns:
17
+ List of strings
18
+ """
19
+ if pd.isna(s):
20
+ return []
21
+
22
+ try:
23
+ # Remove the c() wrapper and split by commas
24
+ if isinstance(s, str) and s.startswith('c(') and s.endswith(')'):
25
+ # Extract content between c( and )
26
+ content = s[2:-1].strip()
27
+
28
+ # Use regex to properly split quoted strings
29
+ pattern = r'"([^"]*)"'
30
+ matches = re.findall(pattern, content)
31
+
32
+ # Filter out empty strings and NA values
33
+ ingredients = [item.strip() for item in matches if item.strip() and item.lower() != 'na']
34
+ return ingredients
35
+ elif isinstance(s, list):
36
+ return s
37
+ else:
38
+ return []
39
+ except Exception as e:
40
+ logger.warning(f"Error parsing R vector: {s}, Error: {str(e)}")
41
+ return []
42
+
43
+ def preprocess_data(df):
44
+ """
45
+ Preprocess the dataframe by handling boolean, numerical, and list-like columns.
46
+ """
47
+ bool_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
48
+ 'is_low carb', 'is_keto', 'is_paleo']
49
+ for col in bool_columns:
50
+ df[col] = df[col].map({'TRUE': 1, 'FALSE': 0, True: 1, False: 0}).fillna(0).astype(int)
51
+
52
+ numerical_columns = ['Calories', 'TotalTime_minutes', 'AggregatedRating', 'ReviewCount']
53
+ for col in numerical_columns:
54
+ df[col] = pd.to_numeric(df[col], errors='coerce')
55
+ median_value = df[col].median()
56
+ df[col] = df[col].fillna(median_value)
57
+
58
+ # Handle R vector format columns
59
+ r_vector_columns = ['RecipeIngredientParts', 'RecipeInstructions', 'RecipeIngredientQuantities']
60
+ for col in r_vector_columns:
61
+ df[col] = df[col].apply(parse_r_vector)
62
+
63
+ # Handle regular list columns
64
+ list_columns = ['Keywords', 'keywords_name']
65
+ for col in list_columns:
66
+ df[col] = df[col].apply(parse_list_string)
67
+
68
+ return df
69
+
70
+ def parse_list_string(s):
71
+ """
72
+ Safely parse list-like strings.
73
+ """
74
+ if pd.isna(s):
75
+ return []
76
+ try:
77
+ if isinstance(s, str):
78
+ parsed = ast.literal_eval(s)
79
+ return parsed if isinstance(parsed, list) else [s]
80
+ elif isinstance(s, list):
81
+ return s
82
+ return []
83
+ except (ValueError, SyntaxError):
84
+ return [s] if s else []
85
+
86
+ def parse_recipe_ingredients(ingredient_parts):
87
+ """
88
+ Parse RecipeIngredientParts field handling R vector format.
89
+ """
90
+ return parse_r_vector(ingredient_parts)
91
+
92
+ def parse_list_field(field):
93
+ """
94
+ Parse a list field, handling various input types including R vectors.
95
+ """
96
+ if pd.isna(field):
97
+ return []
98
+ if isinstance(field, list):
99
+ return field
100
+ elif isinstance(field, str):
101
+ if field.startswith('c('):
102
+ return parse_r_vector(field)
103
+ try:
104
+ parsed = ast.literal_eval(field)
105
+ return parsed if isinstance(parsed, list) else []
106
+ except (ValueError, SyntaxError):
107
+ return []
108
+ return []
app/utils/feature_engineering.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+ from sklearn.preprocessing import MinMaxScaler
3
+ from scipy.sparse import hstack
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ def create_feature_matrices(df, feature_weights):
8
+ """
9
+ Create feature matrices for the recommendation system.
10
+ """
11
+ tfidf_vectorizer_ingredients = TfidfVectorizer(
12
+ stop_words='english',
13
+ max_features=5000,
14
+ ngram_range=(1, 2),
15
+ min_df=1
16
+ )
17
+
18
+ ingredients_text = df['RecipeIngredientParts'].apply(lambda x: ' '.join(x) if x else '')
19
+ tfidf_matrix_ingredients = tfidf_vectorizer_ingredients.fit_transform(ingredients_text)
20
+
21
+ tfidf_vectorizer_keywords = TfidfVectorizer(stop_words='english', max_features=3000)
22
+ tfidf_vectorizer_keywords_name = TfidfVectorizer(stop_words='english', max_features=3000)
23
+
24
+ keywords_text = df['Keywords'].apply(lambda x: ' '.join(x) if x else '')
25
+ keywords_name_text = df['keywords_name'].apply(lambda x: ' '.join(x) if x else '')
26
+
27
+ tfidf_matrix_keywords = tfidf_vectorizer_keywords.fit_transform(keywords_text)
28
+ tfidf_matrix_keywords_name = tfidf_vectorizer_keywords_name.fit_transform(keywords_name_text)
29
+
30
+ category_dummies = pd.get_dummies(df['RecipeCategory'])
31
+ category_matrix = category_dummies.values
32
+
33
+ dietary_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
34
+ 'is_low carb', 'is_keto', 'is_paleo']
35
+ dietary_matrix = df[dietary_columns].values
36
+
37
+ scaler = MinMaxScaler()
38
+ calories_matrix = scaler.fit_transform(df[['Calories']].values)
39
+ time_matrix = scaler.fit_transform(df[['TotalTime_minutes']].values)
40
+ rating_matrix = scaler.fit_transform(df[['AggregatedRating']].values)
41
+
42
+ combined_matrix = hstack([
43
+ tfidf_matrix_ingredients * feature_weights['ingredients'],
44
+ category_matrix * feature_weights['category'],
45
+ dietary_matrix * feature_weights['dietary'],
46
+ calories_matrix * feature_weights['calories'],
47
+ time_matrix * feature_weights['time'],
48
+ tfidf_matrix_keywords * feature_weights['keywords'],
49
+ tfidf_matrix_keywords_name * feature_weights['keywords_name'],
50
+ rating_matrix * 0.05 # Small weight for ratings in base similarity
51
+ ])
52
+
53
+ return (combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords,
54
+ tfidf_vectorizer_keywords_name, category_dummies, scaler)
55
+
56
+ def create_query_vector(combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords,
57
+ tfidf_vectorizer_keywords_name, category_dummies, scaler, feature_weights, **kwargs):
58
+ """
59
+ Create a query vector based on user input.
60
+ """
61
+ query_vector = np.zeros((1, combined_matrix.shape[1]))
62
+ current_position = 0
63
+
64
+ if kwargs.get('ingredients'):
65
+ ingredients_query = tfidf_vectorizer_ingredients.transform([' '.join(kwargs['ingredients'])])
66
+ query_vector[:, :ingredients_query.shape[1]] = ingredients_query.toarray() * feature_weights['ingredients']
67
+ current_position += ingredients_query.shape[1]
68
+
69
+ category_vector = np.zeros((1, category_dummies.shape[1]))
70
+ if kwargs.get('category') and kwargs['category'] in category_dummies.columns:
71
+ category_index = category_dummies.columns.get_loc(kwargs['category'])
72
+ category_vector[0, category_index] = 1
73
+ query_vector[:, current_position:current_position + category_dummies.shape[1]] = (
74
+ category_vector * feature_weights['category']
75
+ )
76
+ current_position += category_dummies.shape[1]
77
+
78
+ dietary_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
79
+ 'is_low carb', 'is_keto', 'is_paleo']
80
+ dietary_vector = np.zeros((1, len(dietary_columns)))
81
+ if kwargs.get('dietary_preference') in dietary_columns:
82
+ dietary_index = dietary_columns.index(kwargs['dietary_preference'])
83
+ dietary_vector[0, dietary_index] = 1
84
+ query_vector[:, current_position:current_position + len(dietary_columns)] = (
85
+ dietary_vector * feature_weights['dietary']
86
+ )
87
+ current_position += len(dietary_columns)
88
+
89
+ calories_vector = np.zeros((1, 1))
90
+ time_vector = np.zeros((1, 1))
91
+
92
+ if kwargs.get('calories'):
93
+ calories_vector[0, 0] = kwargs['calories']
94
+ if kwargs.get('time'):
95
+ time_vector[0, 0] = kwargs['time']
96
+
97
+ calories_vector = scaler.transform(calories_vector)
98
+ time_vector = scaler.transform(time_vector)
99
+
100
+ query_vector[:, current_position:current_position + 1] = calories_vector * feature_weights['calories']
101
+ current_position += 1
102
+ query_vector[:, current_position:current_position + 1] = time_vector * feature_weights['time']
103
+ current_position += 1
104
+
105
+ if kwargs.get('keywords'):
106
+ keywords_query = tfidf_vectorizer_keywords.transform([' '.join(kwargs['keywords'])])
107
+ query_vector[:, current_position:current_position + keywords_query.shape[1]] = (
108
+ keywords_query.toarray() * feature_weights['keywords']
109
+ )
110
+ current_position += keywords_query.shape[1]
111
+
112
+ if kwargs.get('keywords_name'):
113
+ keywords_name_query = tfidf_vectorizer_keywords_name.transform([' '.join(kwargs['keywords_name'])])
114
+ query_vector[:, current_position:current_position + keywords_name_query.shape[1]] = (
115
+ keywords_name_query.toarray() * feature_weights['keywords_name']
116
+ )
117
+
118
+ return query_vector
app/utils/recommendation_utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from app.models.recipe import Recipe
3
+ from app.utils.feature_engineering import create_query_vector
4
+ from app.utils.similarity_calculation import calculate_weighted_similarity
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ async def get_top_recommendations(df, combined_matrix, tfidf_vectorizer_ingredients,
9
+ tfidf_vectorizer_keywords, tfidf_vectorizer_keywords_name,
10
+ category_dummies, scaler, feature_weights, image_search_service,
11
+ category=None, dietary_preference=None, ingredients=None,
12
+ calories=None, time=None, keywords=None, keywords_name=None, top_n=5):
13
+ logger.info(f"Starting recommendation process for category: {category}, dietary_preference: {dietary_preference}")
14
+
15
+ query_vector = create_query_vector(combined_matrix, tfidf_vectorizer_ingredients,
16
+ tfidf_vectorizer_keywords, tfidf_vectorizer_keywords_name,
17
+ category_dummies, scaler, feature_weights,
18
+ category=category, dietary_preference=dietary_preference,
19
+ ingredients=ingredients, calories=calories, time=time,
20
+ keywords=keywords, keywords_name=keywords_name)
21
+
22
+ similarity_scores = calculate_weighted_similarity(query_vector, combined_matrix, df, calories, time)
23
+
24
+ if category:
25
+ similarity_scores *= (df['RecipeCategory'] == category)
26
+
27
+ top_indices = similarity_scores.argsort()[-top_n*3:][::-1]
28
+ logger.info(f"Found {len(top_indices)} potential recommendations")
29
+
30
+ results = []
31
+ async with image_search_service as image_service:
32
+ for idx in top_indices:
33
+ if len(results) >= top_n:
34
+ break
35
+
36
+ recipe = df.iloc[idx]
37
+
38
+ if category and recipe['RecipeCategory'] != category:
39
+ continue
40
+
41
+ try:
42
+ image_urls = await image_service.search_recipe_images(recipe['Name'], recipe['Images'], 3)
43
+ except Exception as e:
44
+ logger.error(f"Error searching images for {recipe['Name']}: {str(e)}")
45
+ image_urls = []
46
+
47
+ results.append(Recipe(
48
+ RecipeId=int(recipe['RecipeId']),
49
+ Name=recipe['Name'],
50
+ RecipeCategory=recipe['RecipeCategory'],
51
+ RecipeIngredientParts=recipe['RecipeIngredientParts'],
52
+ Keywords=recipe['Keywords'],
53
+ keywords_name=recipe['keywords_name'],
54
+ Calories=float(recipe['Calories']),
55
+ TotalTime_minutes=int(recipe['TotalTime_minutes']),
56
+ AggregatedRating=float(recipe['AggregatedRating']),
57
+ ReviewCount=int(recipe['ReviewCount']),
58
+ Description=recipe['Description'],
59
+ RecipeIngredientQuantities=recipe['RecipeIngredientQuantities'],
60
+ RecipeInstructions=recipe['RecipeInstructions'],
61
+ Images=image_urls,
62
+ Similarity=float(similarity_scores[idx])
63
+ ))
64
+
65
+ logger.info(f"Returning {len(results)} recommendations")
66
+ return results[:top_n]
app/utils/scrapers/__pycache__/allrecipes_scraper.cpython-312.pyc ADDED
Binary file (3.03 kB). View file
 
app/utils/scrapers/__pycache__/base_scraper.cpython-312.pyc ADDED
Binary file (3 kB). View file
 
app/utils/scrapers/__pycache__/food_network_scraper.cpython-312.pyc ADDED
Binary file (2.74 kB). View file
 
app/utils/scrapers/__pycache__/fooddotcom_scraper.cpython-312.pyc ADDED
Binary file (3.87 kB). View file
 
app/utils/scrapers/__pycache__/google_scraper.cpython-312.pyc ADDED
Binary file (3.11 kB). View file
 
app/utils/scrapers/__pycache__/wikimedia_scraper.cpython-312.pyc ADDED
Binary file (2.92 kB). View file
 
app/utils/scrapers/allrecipes_scraper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ from urllib.parse import quote
3
+ from typing import List
4
+ from .base_scraper import BaseScraper
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class AllRecipesScraper(BaseScraper):
10
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
11
+ search_query = quote(recipe_name)
12
+ url = f"https://www.allrecipes.com/search?q={search_query}"
13
+
14
+ try:
15
+ async with self.session.get(url, headers=await self.get_headers()) as response:
16
+ if response.status != 200:
17
+ return []
18
+
19
+ html = await response.text()
20
+ soup = BeautifulSoup(html, 'html.parser')
21
+ images = set()
22
+
23
+ for img in soup.find_all('img'):
24
+ src = img.get('src') or img.get('data-src')
25
+ if src and not any(x in src.lower() for x in ['icon', 'logo', 'advertisement']):
26
+ images.add(src)
27
+
28
+ valid_images = []
29
+ for img_url in images:
30
+ if len(valid_images) >= num_images:
31
+ break
32
+ if await self.verify_image_url(img_url):
33
+ valid_images.append(img_url)
34
+
35
+ return valid_images
36
+ except Exception as e:
37
+ logger.error(f"AllRecipes scraping error: {str(e)}")
38
+ return []
app/utils/scrapers/base_scraper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import aiohttp
3
+ from abc import ABC, abstractmethod
4
+ from typing import List
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class BaseScraper(ABC):
10
+ def __init__(self):
11
+ self.user_agents = [
12
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
13
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
14
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
15
+ ]
16
+ self.session = None
17
+
18
+ async def get_headers(self):
19
+ return {
20
+ 'User-Agent': random.choice(self.user_agents),
21
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
22
+ 'Accept-Language': 'en-US,en;q=0.5',
23
+ 'Referer': 'https://www.google.com/',
24
+ }
25
+
26
+ @abstractmethod
27
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
28
+ pass
29
+
30
+ async def verify_image_url(self, url: str) -> bool:
31
+ try:
32
+ async with self.session.head(url, allow_redirects=True, timeout=60) as response:
33
+ content_type = response.headers.get('content-type', '')
34
+ return (response.status == 200 and
35
+ 'image' in content_type and
36
+ not any(x in url.lower() for x in ['placeholder', 'default', 'missing']))
37
+ except:
38
+ return False
app/utils/scrapers/food_network_scraper.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ from urllib.parse import quote
3
+ from .base_scraper import BaseScraper
4
+ from typing import List
5
+
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class FoodNetworkScraper(BaseScraper):
11
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
12
+ search_query = quote(recipe_name)
13
+ url = f"https://www.foodnetwork.com/search/{search_query}-"
14
+
15
+ try:
16
+ async with self.session.get(url, headers=await self.get_headers()) as response:
17
+ if response.status != 200:
18
+ return []
19
+
20
+ html = await response.text()
21
+ soup = BeautifulSoup(html, 'html.parser')
22
+ images = set()
23
+
24
+ for img in soup.find_all('img', {'data-src': True}):
25
+ src = img.get('data-src')
26
+ if src and 'thumbnail' not in src.lower():
27
+ images.add(src)
28
+
29
+ valid_images = []
30
+ for img_url in images:
31
+ if len(valid_images) >= num_images:
32
+ break
33
+ if await self.verify_image_url(img_url):
34
+ valid_images.append(img_url)
35
+
36
+ return valid_images
37
+ except Exception as e:
38
+ logger.error(f"Food Network scraping error: {str(e)}")
39
+ return []
app/utils/scrapers/fooddotcom_scraper.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import re
3
+ from typing import List
4
+ from urllib.parse import quote
5
+ from .base_scraper import BaseScraper
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class FoodDotComScraper(BaseScraper):
11
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
12
+ search_query = quote(recipe_name)
13
+ url = f"https://www.food.com/search/{search_query}?pn=1"
14
+
15
+ try:
16
+ async with self.session.get(url, headers=await self.get_headers()) as response:
17
+ if response.status != 200:
18
+ return []
19
+
20
+ html = await response.text()
21
+ soup = BeautifulSoup(html, 'html.parser')
22
+ images = set()
23
+
24
+ # Look for recipe cards which usually contain the main images
25
+ recipe_cards = soup.find_all('div', {'class': 'recipe-card'})
26
+ for card in recipe_cards:
27
+ # Check for lazy-loaded images
28
+ img_tags = card.find_all('img', {'data-src': True})
29
+ for img in img_tags:
30
+ src = img.get('data-src')
31
+ if src:
32
+ # Food.com often uses different image sizes, try to get the largest
33
+ # Replace size parameters in URL to get larger images
34
+ src = re.sub(r's\d+-c', 's800-c', src)
35
+ images.add(src)
36
+
37
+ # Check for regular images
38
+ img_tags = card.find_all('img', {'src': True})
39
+ for img in img_tags:
40
+ src = img.get('src')
41
+ if src and not any(x in src.lower() for x in ['icon', 'logo', 'advertisement']):
42
+ src = re.sub(r's\d+-c', 's800-c', src)
43
+ images.add(src)
44
+
45
+ # If no recipe cards found, try finding images in the main content
46
+ if not images:
47
+ img_tags = soup.find_all('img', {'class': 'recipe-image'})
48
+ for img in img_tags:
49
+ src = img.get('src') or img.get('data-src')
50
+ if src:
51
+ src = re.sub(r's\d+-c', 's800-c', src)
52
+ images.add(src)
53
+
54
+ valid_images = []
55
+ for img_url in images:
56
+ if len(valid_images) >= num_images:
57
+ break
58
+ if await self.verify_image_url(img_url):
59
+ valid_images.append(img_url)
60
+
61
+ return valid_images
62
+
63
+ except Exception as e:
64
+ logger.error(f"Food.com scraping error: {str(e)}")
65
+ return []
app/utils/scrapers/google_scraper.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import re
3
+ from urllib.parse import quote, unquote
4
+ from .base_scraper import BaseScraper
5
+ import logging
6
+ from typing import List
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class GoogleScraper(BaseScraper):
12
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
13
+ search_query = f"{recipe_name} recipe food"
14
+ url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
15
+
16
+ try:
17
+ async with self.session.get(url, headers=await self.get_headers()) as response:
18
+ if response.status != 200:
19
+ return []
20
+
21
+ html = await response.text()
22
+ soup = BeautifulSoup(html, 'html.parser')
23
+ images = set()
24
+
25
+ # Extract from JSON-like data in scripts
26
+ for script in soup.find_all('script'):
27
+ if script.string and 'AF_initDataCallback' in script.string:
28
+ urls = re.findall(r'(https?://\S+\.(?:jpg|jpeg|png))', script.string)
29
+ images.update(unquote(url) for url in urls)
30
+
31
+ # Verify URLs and take only valid ones
32
+ valid_images = []
33
+ for img_url in images:
34
+ if len(valid_images) >= num_images:
35
+ break
36
+ if await self.verify_image_url(img_url):
37
+ valid_images.append(img_url)
38
+
39
+ return valid_images
40
+ except Exception as e:
41
+ logger.error(f"Google scraping error: {str(e)}")
42
+ return []
app/utils/scrapers/wikimedia_scraper.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urllib.parse import quote
2
+ from typing import List
3
+ from .base_scraper import BaseScraper
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class WikimediaScraper(BaseScraper):
9
+ async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
10
+ search_query = quote(recipe_name)
11
+ url = f"https://commons.wikimedia.org/w/api.php"
12
+ params = {
13
+ "action": "query",
14
+ "format": "json",
15
+ "list": "search",
16
+ "srsearch": f"{search_query} food",
17
+ "srnamespace": "6", # File namespace
18
+ "srlimit": num_images
19
+ }
20
+
21
+ try:
22
+ async with self.session.get(url, params=params, headers=await self.get_headers()) as response:
23
+ if response.status != 200:
24
+ return []
25
+
26
+ data = await response.json()
27
+ images = set()
28
+
29
+ for item in data.get('query', {}).get('search', []):
30
+ title = item.get('title', '')
31
+ if title.startswith('File:'):
32
+ file_url = f"https://commons.wikimedia.org/wiki/Special:FilePath/{quote(title[5:])}"
33
+ images.add(file_url)
34
+
35
+ valid_images = []
36
+ for img_url in images:
37
+ if len(valid_images) >= num_images:
38
+ break
39
+ if await self.verify_image_url(img_url):
40
+ valid_images.append(img_url)
41
+
42
+ return valid_images
43
+ except Exception as e:
44
+ logger.error(f"Wikimedia scraping error: {str(e)}")
45
+ return []
app/utils/similarity_calculation.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics.pairwise import cosine_similarity
2
+ import numpy as np
3
+
4
+ def calculate_weighted_similarity(query_vector, combined_matrix, df, target_calories=None, target_time=None):
5
+ """
6
+ Calculate weighted similarity scores between the query vector and the combined matrix.
7
+ """
8
+ base_similarity = cosine_similarity(query_vector, combined_matrix).flatten()
9
+
10
+ penalties = np.ones_like(base_similarity)
11
+
12
+ if target_calories is not None:
13
+ calorie_diff = np.abs(df['Calories'].values - target_calories)
14
+ calorie_penalty = 1 - (calorie_diff / df['Calories'].max())
15
+ penalties *= calorie_penalty
16
+
17
+ if target_time is not None:
18
+ time_diff = np.abs(df['TotalTime_minutes'].values - target_time)
19
+ time_penalty = 1 - (time_diff / df['TotalTime_minutes'].max())
20
+ penalties *= time_penalty
21
+
22
+ return base_similarity * penalties
config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class Config:
4
+ CSV_FILE_PATH = os.path.join(os.path.dirname(__file__), 'recipe_dataset.csv')
5
+ PRECOMPUTED_DIR = 'precomputed'
6
+
form_data.json ADDED
The diff for this file is too large to render. See raw diff
 
precomputed/category_dummies.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16cabab6b55b1592c9645bd51bae137af0a740b165c983be6d2e70a5a4ce010
3
+ size 165342541
precomputed/combined_matrix.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6192a266f00edac60dc58ae9414d925313df00f32d51fa063816b69931f23ac
3
+ size 146356536
precomputed/df.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d96d26fed0f3b971dc44a540e4a55260e1ce3d966f1dd50b22e98bf8e08dafd9
3
+ size 799635866
precomputed/scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee2fe48286554095f1c37aaff1dd00cf264cce4ce89a6aadc04de24a421a365
3
+ size 719
precomputed/tfidf_vectorizer_ingredients.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7450c31c3bca4362d8f95282756baf6a97dc0b57c9c1b06328e06212a5bc9789
3
+ size 208006
precomputed/tfidf_vectorizer_keywords.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c80bcf57cc31d546f0362cb4d54d0eff63ce903d026a45aaa287322afdeca4
3
+ size 15243
precomputed/tfidf_vectorizer_keywords_name.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eda812093fd905b029de6c59f3db10586d09570d8508b677cceb4f9a58bcadf
3
+ size 109419
recipe_dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076a78cbd94ba5b8b0cba19591b0d96edcedd8d020d0143aae63b81eda3d1e91
3
+ size 713412559
run.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # run.py
2
+ from app import create_app
3
+ from flask import Flask, jsonify
4
+ from flask_cors import CORS
5
+ import json
6
+
7
+ app = create_app()
8
+ CORS(app, resources={r"/*": {"origins": "http://localhost:3000"}})
9
+
10
+ if __name__ == '__main__':
11
+ app.run(debug=True)