Spaces:
Running
Running
Upload 48 files
Browse files- .gitattributes +1 -0
- app/__init__.py +18 -0
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/api/__pycache__/routes.cpython-312.pyc +0 -0
- app/api/routes.py +151 -0
- app/main.py +3 -0
- app/models/__pycache__/recipe.cpython-312.pyc +0 -0
- app/models/recipe.py +20 -0
- app/services/__pycache__/extraction.cpython-312.pyc +0 -0
- app/services/__pycache__/image_query.cpython-312.pyc +0 -0
- app/services/__pycache__/image_search.cpython-312.pyc +0 -0
- app/services/__pycache__/recommendation.cpython-312.pyc +0 -0
- app/services/extraction.py +701 -0
- app/services/image_query.py +50 -0
- app/services/image_search.py +126 -0
- app/services/recommendation.py +28 -0
- app/utils/__pycache__/data_loading.cpython-312.pyc +0 -0
- app/utils/__pycache__/data_preprocessing.cpython-312.pyc +0 -0
- app/utils/__pycache__/feature_engineering.cpython-312.pyc +0 -0
- app/utils/__pycache__/recommendation_utils.cpython-312.pyc +0 -0
- app/utils/__pycache__/similarity_calculation.cpython-312.pyc +0 -0
- app/utils/data_loading.py +47 -0
- app/utils/data_preprocessing.py +108 -0
- app/utils/feature_engineering.py +118 -0
- app/utils/recommendation_utils.py +66 -0
- app/utils/scrapers/__pycache__/allrecipes_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/__pycache__/base_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/__pycache__/food_network_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/__pycache__/fooddotcom_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/__pycache__/google_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/__pycache__/wikimedia_scraper.cpython-312.pyc +0 -0
- app/utils/scrapers/allrecipes_scraper.py +38 -0
- app/utils/scrapers/base_scraper.py +38 -0
- app/utils/scrapers/food_network_scraper.py +39 -0
- app/utils/scrapers/fooddotcom_scraper.py +65 -0
- app/utils/scrapers/google_scraper.py +42 -0
- app/utils/scrapers/wikimedia_scraper.py +45 -0
- app/utils/similarity_calculation.py +22 -0
- config.py +6 -0
- form_data.json +0 -0
- precomputed/category_dummies.joblib +3 -0
- precomputed/combined_matrix.npz +3 -0
- precomputed/df.joblib +3 -0
- precomputed/scaler.joblib +3 -0
- precomputed/tfidf_vectorizer_ingredients.joblib +3 -0
- precomputed/tfidf_vectorizer_keywords.joblib +3 -0
- precomputed/tfidf_vectorizer_keywords_name.joblib +3 -0
- recipe_dataset.csv +3 -0
- run.py +11 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
recipe_dataset.csv filter=lfs diff=lfs merge=lfs -text
|
app/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask
|
| 2 |
+
from app.api.routes import api_bp
|
| 3 |
+
from app.services.recommendation import FlexibleRecipeRecommendationSystem
|
| 4 |
+
from config import Config
|
| 5 |
+
|
| 6 |
+
def create_app(config_object=Config):
|
| 7 |
+
app = Flask(__name__)
|
| 8 |
+
app.config.from_object(config_object)
|
| 9 |
+
|
| 10 |
+
# Initialize the recommendation system with both CSV_FILE_PATH and PRECOMPUTED_DIR
|
| 11 |
+
app.recommendation_system = FlexibleRecipeRecommendationSystem(
|
| 12 |
+
app.config['CSV_FILE_PATH'],
|
| 13 |
+
app.config['PRECOMPUTED_DIR']
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
app.register_blueprint(api_bp)
|
| 17 |
+
|
| 18 |
+
return app
|
app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (918 Bytes). View file
|
|
|
app/api/__pycache__/routes.cpython-312.pyc
ADDED
|
Binary file (6.45 kB). View file
|
|
|
app/api/routes.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Blueprint, Response, request, jsonify, current_app
|
| 2 |
+
from app.models.recipe import Recipe
|
| 3 |
+
import json
|
| 4 |
+
import asyncio
|
| 5 |
+
from app.services import extraction
|
| 6 |
+
from app.services import image_query
|
| 7 |
+
|
| 8 |
+
api_bp = Blueprint('api', __name__)
|
| 9 |
+
|
| 10 |
+
@api_bp.route('/form-data', methods=['GET'])
|
| 11 |
+
def get_form_data():
|
| 12 |
+
with open('form_data.json', 'r') as file:
|
| 13 |
+
data = json.load(file)
|
| 14 |
+
return jsonify(data)
|
| 15 |
+
|
| 16 |
+
@api_bp.route('/recommend', methods=['POST'])
|
| 17 |
+
async def recommend_recipes(): # Make this function async
|
| 18 |
+
data = request.json
|
| 19 |
+
category = data.get('category')
|
| 20 |
+
dietary_preference = data.get('dietary_preference')
|
| 21 |
+
ingredients = data.get('ingredients', [])
|
| 22 |
+
calories = data.get('calories')
|
| 23 |
+
time = data.get('time')
|
| 24 |
+
keywords = data.get('keywords', [])
|
| 25 |
+
keywords_name = data.get('keywords_name', [])
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
if calories is not None:
|
| 29 |
+
calories = int(calories)
|
| 30 |
+
if time is not None:
|
| 31 |
+
time = int(time)
|
| 32 |
+
except ValueError:
|
| 33 |
+
return jsonify({"error": "Calories and time must be integers if provided"}), 400
|
| 34 |
+
|
| 35 |
+
# Use await to call the async function
|
| 36 |
+
recommendations = await current_app.recommendation_system.get_recommendations(
|
| 37 |
+
category=category,
|
| 38 |
+
dietary_preference=dietary_preference,
|
| 39 |
+
ingredients=ingredients,
|
| 40 |
+
calories=calories,
|
| 41 |
+
time=time,
|
| 42 |
+
keywords=keywords,
|
| 43 |
+
keywords_name=keywords_name
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
return jsonify([vars(recipe) for recipe in recommendations])
|
| 47 |
+
|
| 48 |
+
@api_bp.route('/extract-recipe-attributes', methods=['POST'])
|
| 49 |
+
async def recommend_recipes2():
|
| 50 |
+
try:
|
| 51 |
+
data = request.get_json()
|
| 52 |
+
if not data:
|
| 53 |
+
return jsonify({"error": "No data provided"}), 400
|
| 54 |
+
|
| 55 |
+
raw_text = data.get('text')
|
| 56 |
+
if not raw_text:
|
| 57 |
+
return jsonify({"error": "No search text provided"}), 400
|
| 58 |
+
|
| 59 |
+
# Extract recipe attributes
|
| 60 |
+
extracted_info = extraction.extract_recipe_attributes(raw_text) # Call the extraction function
|
| 61 |
+
|
| 62 |
+
# Check if extraction was successful
|
| 63 |
+
if 'error' in extracted_info:
|
| 64 |
+
return jsonify(extracted_info), 500
|
| 65 |
+
|
| 66 |
+
# Access the extracted attributes
|
| 67 |
+
category = extracted_info.get('category', '')
|
| 68 |
+
calories = extracted_info.get('calories', None)
|
| 69 |
+
time = extracted_info.get('time', None)
|
| 70 |
+
keywords = extracted_info.get('keywords', [])
|
| 71 |
+
keywords_name = extracted_info.get('keywords_name', [])
|
| 72 |
+
|
| 73 |
+
# Convert calories and time to integers if they exist
|
| 74 |
+
try:
|
| 75 |
+
calories = int(calories) if calories else None
|
| 76 |
+
time = int(time) if time else None
|
| 77 |
+
except (ValueError, TypeError):
|
| 78 |
+
return jsonify({"error": "Invalid calories or time value"}), 400
|
| 79 |
+
|
| 80 |
+
# Get recommendations using the recommendation system
|
| 81 |
+
recommendations = await current_app.recommendation_system.get_recommendations(
|
| 82 |
+
category=category,
|
| 83 |
+
ingredients=[], # Adjust if you plan to add ingredients in the extraction function
|
| 84 |
+
calories=calories,
|
| 85 |
+
time=time,
|
| 86 |
+
keywords=keywords,
|
| 87 |
+
keywords_name=keywords_name
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Convert recommendations to JSON-serializable format
|
| 91 |
+
recipe_list = [vars(recipe) for recipe in recommendations]
|
| 92 |
+
|
| 93 |
+
return jsonify(recipe_list)
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
return jsonify({"error": str(e)}), 500
|
| 97 |
+
|
| 98 |
+
# searchImage
|
| 99 |
+
@api_bp.route('/analyze-food-image', methods=['POST'])
|
| 100 |
+
async def handle_analyze_food_image():
|
| 101 |
+
try:
|
| 102 |
+
if 'image' not in request.files:
|
| 103 |
+
return jsonify({"error": "No image file provided"}), 400
|
| 104 |
+
|
| 105 |
+
file = request.files['image']
|
| 106 |
+
|
| 107 |
+
if file.filename == '':
|
| 108 |
+
return jsonify({"error": "No selected file"}), 400
|
| 109 |
+
|
| 110 |
+
# Call the analyze function with the file
|
| 111 |
+
description = image_query.analyze_food_image(file)
|
| 112 |
+
|
| 113 |
+
# Extract recipe attributes
|
| 114 |
+
extracted_info = extraction.extract_recipe_attributes(description) # Call the extraction function
|
| 115 |
+
|
| 116 |
+
# Check if extraction was successful
|
| 117 |
+
if 'error' in extracted_info:
|
| 118 |
+
return jsonify(extracted_info), 500
|
| 119 |
+
|
| 120 |
+
# Access the extracted attributes
|
| 121 |
+
category = extracted_info.get('category', '')
|
| 122 |
+
calories = extracted_info.get('calories', None)
|
| 123 |
+
time = extracted_info.get('time', None)
|
| 124 |
+
keywords = extracted_info.get('keywords', [])
|
| 125 |
+
keywords_name = extracted_info.get('keywords_name', [])
|
| 126 |
+
|
| 127 |
+
# Convert calories and time to integers if they exist
|
| 128 |
+
try:
|
| 129 |
+
calories = int(calories) if calories else None
|
| 130 |
+
time = int(time) if time else None
|
| 131 |
+
except (ValueError, TypeError):
|
| 132 |
+
return jsonify({"error": "Invalid calories or time value"}), 400
|
| 133 |
+
|
| 134 |
+
# Get recommendations using the recommendation system
|
| 135 |
+
recommendations = await current_app.recommendation_system.get_recommendations(
|
| 136 |
+
category=category,
|
| 137 |
+
ingredients=[], # Adjust if you plan to add ingredients in the extraction function
|
| 138 |
+
calories=calories,
|
| 139 |
+
time=time,
|
| 140 |
+
keywords=keywords,
|
| 141 |
+
keywords_name=keywords_name
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Convert recommendations to JSON-serializable format
|
| 145 |
+
recipe_list = [vars(recipe) for recipe in recommendations]
|
| 146 |
+
|
| 147 |
+
return jsonify(recipe_list)
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
return jsonify({"error": str(e)}), 500
|
| 151 |
+
|
app/main.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app import create_app
|
| 2 |
+
|
| 3 |
+
app = create_app()
|
app/models/__pycache__/recipe.cpython-312.pyc
ADDED
|
Binary file (1 kB). View file
|
|
|
app/models/recipe.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class Recipe:
|
| 6 |
+
RecipeId: int
|
| 7 |
+
Name: str
|
| 8 |
+
RecipeCategory: str
|
| 9 |
+
RecipeIngredientParts: List[str]
|
| 10 |
+
Keywords: List[str]
|
| 11 |
+
keywords_name: List[str] # Add this line
|
| 12 |
+
Calories: float
|
| 13 |
+
TotalTime_minutes: int
|
| 14 |
+
AggregatedRating: float
|
| 15 |
+
ReviewCount: int
|
| 16 |
+
Description: str
|
| 17 |
+
RecipeIngredientQuantities: List[str]
|
| 18 |
+
RecipeInstructions: List[str]
|
| 19 |
+
Images: List[str]
|
| 20 |
+
Similarity: float # Add this line if it's not already present
|
app/services/__pycache__/extraction.cpython-312.pyc
ADDED
|
Binary file (19.5 kB). View file
|
|
|
app/services/__pycache__/image_query.cpython-312.pyc
ADDED
|
Binary file (2.38 kB). View file
|
|
|
app/services/__pycache__/image_search.cpython-312.pyc
ADDED
|
Binary file (7.84 kB). View file
|
|
|
app/services/__pycache__/recommendation.cpython-312.pyc
ADDED
|
Binary file (2.09 kB). View file
|
|
|
app/services/extraction.py
ADDED
|
@@ -0,0 +1,701 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import json
|
| 3 |
+
from difflib import get_close_matches
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from difflib import SequenceMatcher
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 10 |
+
|
| 11 |
+
# Define categories from dataset
|
| 12 |
+
RECIPE_CATEGORIES = [
|
| 13 |
+
"frozen desserts",
|
| 14 |
+
"chicken breast",
|
| 15 |
+
"beverages",
|
| 16 |
+
"soy/tofu",
|
| 17 |
+
"vegetable",
|
| 18 |
+
"pie",
|
| 19 |
+
"chicken",
|
| 20 |
+
"dessert",
|
| 21 |
+
"southwestern u.s.",
|
| 22 |
+
"sauces",
|
| 23 |
+
"stew",
|
| 24 |
+
"black beans",
|
| 25 |
+
"< 60 mins",
|
| 26 |
+
"lactose free",
|
| 27 |
+
"yeast breads",
|
| 28 |
+
"whole chicken",
|
| 29 |
+
"cheesecake",
|
| 30 |
+
"free of...",
|
| 31 |
+
"brazilian",
|
| 32 |
+
"breakfast",
|
| 33 |
+
"breads",
|
| 34 |
+
"bar cookie",
|
| 35 |
+
"brown rice",
|
| 36 |
+
"oranges",
|
| 37 |
+
"pork",
|
| 38 |
+
"low protein",
|
| 39 |
+
"asian",
|
| 40 |
+
"potato",
|
| 41 |
+
"cheese",
|
| 42 |
+
"halibut",
|
| 43 |
+
"meat",
|
| 44 |
+
"lamb/sheep",
|
| 45 |
+
"very low carbs",
|
| 46 |
+
"spaghetti",
|
| 47 |
+
"scones",
|
| 48 |
+
"drop cookies",
|
| 49 |
+
"lunch/snacks",
|
| 50 |
+
"beans",
|
| 51 |
+
"punch beverage",
|
| 52 |
+
"pineapple",
|
| 53 |
+
"quick breads",
|
| 54 |
+
"sourdough breads",
|
| 55 |
+
"curries",
|
| 56 |
+
"chicken livers",
|
| 57 |
+
"coconut",
|
| 58 |
+
"savory pies",
|
| 59 |
+
"poultry",
|
| 60 |
+
"steak",
|
| 61 |
+
"healthy",
|
| 62 |
+
"rice",
|
| 63 |
+
"apple",
|
| 64 |
+
"spreads",
|
| 65 |
+
"crab",
|
| 66 |
+
"jellies",
|
| 67 |
+
"pears",
|
| 68 |
+
"chowders",
|
| 69 |
+
"cauliflower",
|
| 70 |
+
"candy",
|
| 71 |
+
"chutneys",
|
| 72 |
+
"white rice",
|
| 73 |
+
"tex mex",
|
| 74 |
+
"bass",
|
| 75 |
+
"fruit",
|
| 76 |
+
"european",
|
| 77 |
+
"smoothies",
|
| 78 |
+
"manicotti",
|
| 79 |
+
"onions",
|
| 80 |
+
"new zealand",
|
| 81 |
+
"chicken thigh & leg",
|
| 82 |
+
"indonesian",
|
| 83 |
+
"greek",
|
| 84 |
+
"corn",
|
| 85 |
+
"lentil",
|
| 86 |
+
"long grain rice",
|
| 87 |
+
"southwest asia (middle east)",
|
| 88 |
+
"spanish",
|
| 89 |
+
"dutch",
|
| 90 |
+
"gelatin",
|
| 91 |
+
"tuna",
|
| 92 |
+
"citrus",
|
| 93 |
+
"berries",
|
| 94 |
+
"peppers",
|
| 95 |
+
"salad dressings",
|
| 96 |
+
"clear soup",
|
| 97 |
+
"mexican",
|
| 98 |
+
"raspberries",
|
| 99 |
+
"crawfish",
|
| 100 |
+
"beef organ meats",
|
| 101 |
+
"lobster",
|
| 102 |
+
"strawberry",
|
| 103 |
+
"shakes",
|
| 104 |
+
"short grain rice",
|
| 105 |
+
"< 15 mins",
|
| 106 |
+
"german",
|
| 107 |
+
"one dish meal",
|
| 108 |
+
"thai",
|
| 109 |
+
"cajun",
|
| 110 |
+
"russian",
|
| 111 |
+
"melons",
|
| 112 |
+
"swiss",
|
| 113 |
+
"papaya",
|
| 114 |
+
"veal",
|
| 115 |
+
"orange roughy",
|
| 116 |
+
"canadian",
|
| 117 |
+
"caribbean",
|
| 118 |
+
"mussels",
|
| 119 |
+
"medium grain rice",
|
| 120 |
+
"japanese",
|
| 121 |
+
"penne",
|
| 122 |
+
"elk",
|
| 123 |
+
"colombian",
|
| 124 |
+
"gumbo",
|
| 125 |
+
"roast beef",
|
| 126 |
+
"perch",
|
| 127 |
+
"vietnamese",
|
| 128 |
+
"rabbit",
|
| 129 |
+
"lebanese",
|
| 130 |
+
"turkish",
|
| 131 |
+
"kid friendly",
|
| 132 |
+
"whole turkey",
|
| 133 |
+
"chinese",
|
| 134 |
+
"grains",
|
| 135 |
+
"yam/sweet potato",
|
| 136 |
+
"meatloaf",
|
| 137 |
+
"trout",
|
| 138 |
+
"african",
|
| 139 |
+
"ham",
|
| 140 |
+
"goose",
|
| 141 |
+
"pasta shells",
|
| 142 |
+
"stocks",
|
| 143 |
+
"meatballs",
|
| 144 |
+
"whole duck",
|
| 145 |
+
"scandinavian",
|
| 146 |
+
"greens",
|
| 147 |
+
"catfish",
|
| 148 |
+
"duck breasts",
|
| 149 |
+
"polish",
|
| 150 |
+
"deer",
|
| 151 |
+
"wild game",
|
| 152 |
+
"pheasant",
|
| 153 |
+
"hungarian",
|
| 154 |
+
"no shell fish",
|
| 155 |
+
"collard greens",
|
| 156 |
+
"tilapia",
|
| 157 |
+
"quail",
|
| 158 |
+
"moroccan",
|
| 159 |
+
"squid",
|
| 160 |
+
"korean",
|
| 161 |
+
"plums",
|
| 162 |
+
"danish",
|
| 163 |
+
"creole",
|
| 164 |
+
"mahi mahi",
|
| 165 |
+
"tarts",
|
| 166 |
+
"hawaiian",
|
| 167 |
+
"austrian",
|
| 168 |
+
"moose",
|
| 169 |
+
"native american",
|
| 170 |
+
"swedish",
|
| 171 |
+
"norwegian",
|
| 172 |
+
"ethiopian",
|
| 173 |
+
"belgian",
|
| 174 |
+
"australian",
|
| 175 |
+
"bear",
|
| 176 |
+
"scottish",
|
| 177 |
+
"tempeh",
|
| 178 |
+
"cuban",
|
| 179 |
+
"spinach",
|
| 180 |
+
"turkey breasts",
|
| 181 |
+
"cantonese",
|
| 182 |
+
"tropical fruits",
|
| 183 |
+
"peanut butter",
|
| 184 |
+
"szechuan",
|
| 185 |
+
"portuguese",
|
| 186 |
+
"costa rican",
|
| 187 |
+
"duck",
|
| 188 |
+
"nuts",
|
| 189 |
+
"filipino",
|
| 190 |
+
"pot pie",
|
| 191 |
+
"polynesian",
|
| 192 |
+
"mango",
|
| 193 |
+
"cherries",
|
| 194 |
+
"egyptian",
|
| 195 |
+
"chard",
|
| 196 |
+
"lime",
|
| 197 |
+
"lemon",
|
| 198 |
+
"kiwifruit",
|
| 199 |
+
"whitefish",
|
| 200 |
+
"south american",
|
| 201 |
+
"malaysian",
|
| 202 |
+
"octopus",
|
| 203 |
+
"nigerian",
|
| 204 |
+
"south african",
|
| 205 |
+
"nepalese",
|
| 206 |
+
"palestinian",
|
| 207 |
+
"czech",
|
| 208 |
+
"avocado",
|
| 209 |
+
"iraqi",
|
| 210 |
+
"pakistani",
|
| 211 |
+
"chocolate chip cookies",
|
| 212 |
+
"finnish",
|
| 213 |
+
"puerto rican",
|
| 214 |
+
"cambodian",
|
| 215 |
+
"honduran",
|
| 216 |
+
"mongolian",
|
| 217 |
+
"peruvian",
|
| 218 |
+
"turkey gravy",
|
| 219 |
+
"somalian",
|
| 220 |
+
"ice cream",
|
| 221 |
+
"oatmeal",
|
| 222 |
+
"artichoke",
|
| 223 |
+
"indian",
|
| 224 |
+
"grapes",
|
| 225 |
+
"macaroni and cheese",
|
| 226 |
+
"mashed potatoes",
|
| 227 |
+
"pumpkin",
|
| 228 |
+
"guatemalan"
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
def find_closest_category(category):
|
| 232 |
+
"""Find the closest matching category from the dataset."""
|
| 233 |
+
if not category:
|
| 234 |
+
return ""
|
| 235 |
+
|
| 236 |
+
# First check for exact match
|
| 237 |
+
if category.lower() in [c.lower() for c in RECIPE_CATEGORIES]:
|
| 238 |
+
return next(c for c in RECIPE_CATEGORIES if c.lower() == category.lower())
|
| 239 |
+
|
| 240 |
+
# For compound categories, check parts
|
| 241 |
+
category_parts = category.lower().split()
|
| 242 |
+
if len(category_parts) == 1 and category_parts[0] in [c.lower() for c in RECIPE_CATEGORIES]:
|
| 243 |
+
# If the input is a single word that exists in the category list, return it
|
| 244 |
+
return next(c for c in RECIPE_CATEGORIES if c.lower() == category_parts[0])
|
| 245 |
+
|
| 246 |
+
for part in category_parts:
|
| 247 |
+
matches = [c for c in RECIPE_CATEGORIES if part in c.lower()]
|
| 248 |
+
if matches:
|
| 249 |
+
return matches[0]
|
| 250 |
+
|
| 251 |
+
# If no matches found, use difflib to find closest match
|
| 252 |
+
matches = get_close_matches(category.lower(), [c.lower() for c in RECIPE_CATEGORIES], n=1, cutoff=0.75)
|
| 253 |
+
if matches:
|
| 254 |
+
closest_match = matches[0]
|
| 255 |
+
# Check if the closest match is close enough (similarity score > 0.8)
|
| 256 |
+
if SequenceMatcher(None, category.lower(), closest_match).ratio() > 0.8:
|
| 257 |
+
return next(c for c in RECIPE_CATEGORIES if c.lower() == closest_match)
|
| 258 |
+
else:
|
| 259 |
+
return ""
|
| 260 |
+
|
| 261 |
+
# If no match is found at all, return empty string
|
| 262 |
+
return ""
|
| 263 |
+
|
| 264 |
+
def extract_recipe_attributes(text):
|
| 265 |
+
messages = [
|
| 266 |
+
{"role": "system", "content": "You are an assistant that extracts recipe attributes from user input. If the input contains an uncommon or unrecognized category, add relevant general keywords based on common culinary types, such as 'beverages' for drinks, 'dessert' for sweets, etc."},
|
| 267 |
+
{"role": "user", "content": f"""
|
| 268 |
+
From the given text, identify:
|
| 269 |
+
- **category**: The main name or type of the recipe (like "chicken", "ice cream").
|
| 270 |
+
- **calories**: Number of calories, if mentioned.
|
| 271 |
+
- **time**: Time to cook, in minutes.
|
| 272 |
+
- **keywords**: Important words related to the recipe. If the category is not common (like "noodles" or "biryani"), include relevant characteristics (e.g., "asian", "main course", "stir fry", "quick meal", "wheat based", "high protein", etc).
|
| 273 |
+
- **keywords_name**: List of individual words from the category/name. For uncommon categories, include descriptive terms and related categories (e.g., for "noodles": ["asian", "pasta", "wheat", "main dish"]).
|
| 274 |
+
|
| 275 |
+
Examples:
|
| 276 |
+
---
|
| 277 |
+
Input: "noodles"
|
| 278 |
+
Output: {{
|
| 279 |
+
"category": "",
|
| 280 |
+
"calories": "",
|
| 281 |
+
"time": "",
|
| 282 |
+
"keywords": ["asian", "stir fry", "wheat based", "quick meal", "main course", "pasta", "noodles"],
|
| 283 |
+
"keywords_name": ["asian", "pasta", "main dish", "wheat"]
|
| 284 |
+
}}
|
| 285 |
+
|
| 286 |
+
---
|
| 287 |
+
Input: "biryani"
|
| 288 |
+
Output: {{
|
| 289 |
+
"category": "",
|
| 290 |
+
"calories": "",
|
| 291 |
+
"time": "",
|
| 292 |
+
"keywords": ["rice", "indian", "spicy", "main course", "one dish meal", "biryani"],
|
| 293 |
+
"keywords_name": ["rice", "indian", "spicy"]
|
| 294 |
+
}}
|
| 295 |
+
|
| 296 |
+
---
|
| 297 |
+
Input: "sushi"
|
| 298 |
+
Output: {{
|
| 299 |
+
"category": "",
|
| 300 |
+
"calories": "",
|
| 301 |
+
"time": "",
|
| 302 |
+
"keywords": ["japanese", "rice", "seafood", "whitefish", "snack", "main course", "sushi"],
|
| 303 |
+
"keywords_name": ["japanese", "seafood", "rice"]
|
| 304 |
+
}}
|
| 305 |
+
|
| 306 |
+
---
|
| 307 |
+
Input: "vegetable curry"
|
| 308 |
+
Output: {{
|
| 309 |
+
"category": "",
|
| 310 |
+
"calories": "",
|
| 311 |
+
"time": "",
|
| 312 |
+
"keywords": ["vegan", "vegetarian", "spicy", "main course", "curry", "indian"],
|
| 313 |
+
"keywords_name": ["indian", "vegetarian", "spicy"]
|
| 314 |
+
}}
|
| 315 |
+
|
| 316 |
+
---
|
| 317 |
+
Input: "quinoa salad"
|
| 318 |
+
Output: {{
|
| 319 |
+
"category": "salad dressings",
|
| 320 |
+
"calories": "",
|
| 321 |
+
"time": "",
|
| 322 |
+
"keywords": ["healthy", "salad", "gluten-free", "fiber", "low calorie", "vegan"],
|
| 323 |
+
"keywords_name": ["healthy", "salad", "vegan"]
|
| 324 |
+
}}
|
| 325 |
+
|
| 326 |
+
---
|
| 327 |
+
Input: "beef tacos"
|
| 328 |
+
Output: {{
|
| 329 |
+
"category": "beef organ meats",
|
| 330 |
+
"calories": "",
|
| 331 |
+
"time": "",
|
| 332 |
+
"keywords": ["mexican", "beef", "spicy", "snack", "tortilla", "street food"],
|
| 333 |
+
"keywords_name": ["mexican", "beef", "snack"]
|
| 334 |
+
}}
|
| 335 |
+
|
| 336 |
+
---
|
| 337 |
+
Input: "caesar salad"
|
| 338 |
+
Output: {{
|
| 339 |
+
"category": "salad dressings",
|
| 340 |
+
"calories": "",
|
| 341 |
+
"time": "",
|
| 342 |
+
"keywords": ["salad", "appetizer", "healthy", "vegetables", "parmesan", "croutons"],
|
| 343 |
+
"keywords_name": ["salad", "appetizer", "healthy"]
|
| 344 |
+
}}
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
---
|
| 348 |
+
Input: "smoothie bowl"
|
| 349 |
+
Output: {{
|
| 350 |
+
"category": "smoothies",
|
| 351 |
+
"calories": "",
|
| 352 |
+
"time": "",
|
| 353 |
+
"keywords": ["breakfast", "healthy", "fruits", "smoothies", "vegan", "fiber"],
|
| 354 |
+
"keywords_name": ["breakfast", "healthy", "fruits"]
|
| 355 |
+
}}
|
| 356 |
+
|
| 357 |
+
Input: "spaghetti bolognese"
|
| 358 |
+
Output: {{
|
| 359 |
+
"category": "spaghetti",
|
| 360 |
+
"calories": "",
|
| 361 |
+
"time": "",
|
| 362 |
+
"keywords": ["italian", "pasta", "meat", "tomato", "main course", "hearty"],
|
| 363 |
+
"keywords_name": ["italian", "pasta", "meat"]
|
| 364 |
+
}}
|
| 365 |
+
|
| 366 |
+
---
|
| 367 |
+
Input: "I wish to cook chicken soup which contains around 200 calories within 30 mins"
|
| 368 |
+
Output: {{
|
| 369 |
+
"category": "chicken",
|
| 370 |
+
"calories": "200",
|
| 371 |
+
"time": "30",
|
| 372 |
+
"keywords": ["chicken", "soup", "200 calories", "30 mins"],
|
| 373 |
+
"keywords_name": ["chicken", "soup"]
|
| 374 |
+
}}
|
| 375 |
+
|
| 376 |
+
---
|
| 377 |
+
Input: "Quick pasta recipe with 500 calories, ready in 20 mins"
|
| 378 |
+
Output: {{
|
| 379 |
+
"category": "pasta shells",
|
| 380 |
+
"calories": "500",
|
| 381 |
+
"time": "20",
|
| 382 |
+
"keywords": ["pasta shells", "500 calories", "20 mins"],
|
| 383 |
+
"keywords_name": ["pasta shells"]
|
| 384 |
+
}}
|
| 385 |
+
|
| 386 |
+
---
|
| 387 |
+
Input: "uh i wish to cook something which contains protein"
|
| 388 |
+
Output: {{
|
| 389 |
+
"category": "low protein",
|
| 390 |
+
"calories": "",
|
| 391 |
+
"time": "",
|
| 392 |
+
"keywords": ["low protein", "high protein", "protein"],
|
| 393 |
+
"keywords_name": ["low protein"]
|
| 394 |
+
}}
|
| 395 |
+
|
| 396 |
+
---
|
| 397 |
+
Input: "can you suggest something with low calories"
|
| 398 |
+
Output: {{
|
| 399 |
+
"category": "",
|
| 400 |
+
"calories": "",
|
| 401 |
+
"time": "",
|
| 402 |
+
"keywords": ["low calories"],
|
| 403 |
+
"keywords_name": ["low", "calories"]
|
| 404 |
+
}}
|
| 405 |
+
|
| 406 |
+
---
|
| 407 |
+
Input: "looking for a vegetarian recipe"
|
| 408 |
+
Output: {{
|
| 409 |
+
"category": "",
|
| 410 |
+
"calories": "",
|
| 411 |
+
"time": "",
|
| 412 |
+
"keywords": ["vegetarian", "vegan"],
|
| 413 |
+
"keywords_name": ["vegetarian"]
|
| 414 |
+
}}
|
| 415 |
+
|
| 416 |
+
---
|
| 417 |
+
Input: "need something gluten free"
|
| 418 |
+
Output: {{
|
| 419 |
+
"category": "",
|
| 420 |
+
"calories": "",
|
| 421 |
+
"time": "",
|
| 422 |
+
"keywords": ["gluten free"],
|
| 423 |
+
"keywords_name": ["gluten", "free"]
|
| 424 |
+
}}
|
| 425 |
+
|
| 426 |
+
---
|
| 427 |
+
Input: "want to make something dairy free"
|
| 428 |
+
Output: {{
|
| 429 |
+
"category": "",
|
| 430 |
+
"calories": "",
|
| 431 |
+
"time": "",
|
| 432 |
+
"keywords": ["dairy free", "vegan"],
|
| 433 |
+
"keywords_name": ["dairy", "free"]
|
| 434 |
+
}}
|
| 435 |
+
|
| 436 |
+
---
|
| 437 |
+
Input: "what can i cook for dinner"
|
| 438 |
+
Output: {{
|
| 439 |
+
"category": "",
|
| 440 |
+
"calories": "",
|
| 441 |
+
"time": "",
|
| 442 |
+
"keywords": ["dinner", "vegan"],
|
| 443 |
+
"keywords_name": [""]
|
| 444 |
+
}}
|
| 445 |
+
|
| 446 |
+
---
|
| 447 |
+
Input: "what can i cook for breakfast"
|
| 448 |
+
Output: {{
|
| 449 |
+
"category": "",
|
| 450 |
+
"calories": "",
|
| 451 |
+
"time": "",
|
| 452 |
+
"keywords": ["breakfast", "vegan"],
|
| 453 |
+
"keywords_name": [""]
|
| 454 |
+
}}
|
| 455 |
+
|
| 456 |
+
---
|
| 457 |
+
Input: "what can i cook for lunch"
|
| 458 |
+
Output: {{
|
| 459 |
+
"category": "",
|
| 460 |
+
"calories": "",
|
| 461 |
+
"time": "",
|
| 462 |
+
"keywords": ["lunch", "quick meal", "vegan"],
|
| 463 |
+
"keywords_name": [""]
|
| 464 |
+
}}
|
| 465 |
+
|
| 466 |
+
---
|
| 467 |
+
Input: "something with low carbs"
|
| 468 |
+
Output: {{
|
| 469 |
+
"category": "very low carbs",
|
| 470 |
+
"calories": "",
|
| 471 |
+
"time": "",
|
| 472 |
+
"keywords": ["very low carbs", "low carbs", "carbs"],
|
| 473 |
+
"keywords_name": ["low", "carbs"]
|
| 474 |
+
}}
|
| 475 |
+
|
| 476 |
+
---
|
| 477 |
+
Input: "i wish to cook something in 30 minutes"
|
| 478 |
+
Output: {{
|
| 479 |
+
"category": "",
|
| 480 |
+
"calories": "",
|
| 481 |
+
"time": "30",
|
| 482 |
+
"keywords": ["30 minutes", "quick meal"],
|
| 483 |
+
"keywords_name": [""]
|
| 484 |
+
}}
|
| 485 |
+
|
| 486 |
+
---
|
| 487 |
+
Input: "I wish to make fish and stew"
|
| 488 |
+
Output: {{
|
| 489 |
+
"category": "stew",
|
| 490 |
+
"calories": "",
|
| 491 |
+
"time": "",
|
| 492 |
+
"keywords": ["fish", "stew", "high protein"],
|
| 493 |
+
"keywords_name": ["fish", "stew"]
|
| 494 |
+
}}
|
| 495 |
+
|
| 496 |
+
---
|
| 497 |
+
Input: "I wish to make fish and stew"
|
| 498 |
+
Output: {{
|
| 499 |
+
"category": "catfish",
|
| 500 |
+
"calories": "",
|
| 501 |
+
"time": "",
|
| 502 |
+
"keywords": ["fish", "stew", "high protein"],
|
| 503 |
+
"keywords_name": ["fish", "stew"]
|
| 504 |
+
}}
|
| 505 |
+
|
| 506 |
+
---
|
| 507 |
+
Input: "I wish to make fish and stew"
|
| 508 |
+
Output: {{
|
| 509 |
+
"category": "whitefish",
|
| 510 |
+
"calories": "",
|
| 511 |
+
"time": "",
|
| 512 |
+
"keywords": ["fish", "stew", "high protein"],
|
| 513 |
+
"keywords_name": ["fish", "stew"]
|
| 514 |
+
}}
|
| 515 |
+
|
| 516 |
+
---
|
| 517 |
+
Input: "I wish to make fish and stew"
|
| 518 |
+
Output: {{
|
| 519 |
+
"category": "crawfish",
|
| 520 |
+
"calories": "",
|
| 521 |
+
"time": "",
|
| 522 |
+
"keywords": ["fish", "stew", "high protein"],
|
| 523 |
+
"keywords_name": ["fish", "stew"]
|
| 524 |
+
}}
|
| 525 |
+
|
| 526 |
+
---
|
| 527 |
+
Input: "give some recipes involving almonds or dry fruits"
|
| 528 |
+
Output: {{
|
| 529 |
+
"category": "",
|
| 530 |
+
"calories": "",
|
| 531 |
+
"time": "",
|
| 532 |
+
"keywords": ["almonds", "dry fruits"],
|
| 533 |
+
"keywords_name": ["almonds", "dry fruits"]
|
| 534 |
+
}}
|
| 535 |
+
|
| 536 |
+
---
|
| 537 |
+
Input: "tea with milk, sugar, water"
|
| 538 |
+
Output: {{
|
| 539 |
+
"category": "",
|
| 540 |
+
"calories": "",
|
| 541 |
+
"time": "",
|
| 542 |
+
"keywords": ["milk", "sugar", "water", "beverages"],
|
| 543 |
+
"keywords_name": [""]
|
| 544 |
+
}}
|
| 545 |
+
|
| 546 |
+
---
|
| 547 |
+
Input: "chole bhature"
|
| 548 |
+
Output: {{
|
| 549 |
+
"category": "",
|
| 550 |
+
"calories": "",
|
| 551 |
+
"time": "",
|
| 552 |
+
"keywords": ["gluten free"],
|
| 553 |
+
"keywords_name": ["gluten", "free"]
|
| 554 |
+
}}
|
| 555 |
+
|
| 556 |
+
---
|
| 557 |
+
Input: "something involving nuts"
|
| 558 |
+
Output: {{
|
| 559 |
+
"category": "nuts",
|
| 560 |
+
"calories": "",
|
| 561 |
+
"time": "",
|
| 562 |
+
"keywords": ["nuts", "snack", "healthy", "protein", "fiber"],
|
| 563 |
+
"keywords_name": ["nuts", "snack", "healthy"]
|
| 564 |
+
}}
|
| 565 |
+
|
| 566 |
+
---
|
| 567 |
+
Now process this input:
|
| 568 |
+
Input: "{text}"
|
| 569 |
+
Output:
|
| 570 |
+
"""}
|
| 571 |
+
]
|
| 572 |
+
|
| 573 |
+
# Send the prompt to OpenAI API
|
| 574 |
+
response = openai.ChatCompletion.create(
|
| 575 |
+
model="gpt-3.5-turbo",
|
| 576 |
+
messages=messages,
|
| 577 |
+
temperature=0,
|
| 578 |
+
max_tokens=150,
|
| 579 |
+
top_p=1,
|
| 580 |
+
frequency_penalty=0,
|
| 581 |
+
presence_penalty=0,
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
# Process the response
|
| 585 |
+
output_text = response['choices'][0]['message']['content'].strip()
|
| 586 |
+
|
| 587 |
+
try:
|
| 588 |
+
result = json.loads(output_text)
|
| 589 |
+
# Update category with closest match from dataset
|
| 590 |
+
original_category = result["category"]
|
| 591 |
+
matched_category = find_closest_category(original_category)
|
| 592 |
+
|
| 593 |
+
if matched_category:
|
| 594 |
+
# If we found a match (exact or close), update category and keywords_name
|
| 595 |
+
result["category"] = matched_category
|
| 596 |
+
if original_category != matched_category:
|
| 597 |
+
result["keywords_name"] = matched_category.split()
|
| 598 |
+
else:
|
| 599 |
+
result["category"] = ""
|
| 600 |
+
# Add additional context-based keywords if category is empty
|
| 601 |
+
if "coffee" in text.lower() or "latte" in text.lower():
|
| 602 |
+
result["keywords"] = result.get("keywords", []) + ["coffee", "beverages", "caffeinated", "hot drink"]
|
| 603 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "caffeinated", "coffee"]
|
| 604 |
+
elif "espresso" in text.lower():
|
| 605 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "caffeinated", "espresso"]
|
| 606 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "espresso"]
|
| 607 |
+
elif "smoothie bowl" in text.lower():
|
| 608 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie bowl"]
|
| 609 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie bowl"]
|
| 610 |
+
elif "kombucha" in text.lower():
|
| 611 |
+
result["keywords"] = result.get("keywords", []) + ["beverage", "fermented", "kombucha"]
|
| 612 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "kombucha"]
|
| 613 |
+
elif "herbal tea" in text.lower():
|
| 614 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "caffeine-free", "herbal tea"]
|
| 615 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "herbal tea"]
|
| 616 |
+
elif "seaweed" in text.lower():
|
| 617 |
+
result["keywords"] = result.get("keywords", []) + ["ingredient", "seafood", "seaweed"]
|
| 618 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["seaweed"]
|
| 619 |
+
elif "vegan cheese" in text.lower():
|
| 620 |
+
result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "cheese"]
|
| 621 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["vegan cheese"]
|
| 622 |
+
elif "air fryer" in text.lower():
|
| 623 |
+
result["keywords"] = result.get("keywords", []) + ["cooking method", "air fryer", "healthy"]
|
| 624 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["air fryer"]
|
| 625 |
+
elif "instant pot" in text.lower():
|
| 626 |
+
result["keywords"] = result.get("keywords", []) + ["cooking method", "instant pot", "pressure cooker"]
|
| 627 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["instant pot"]
|
| 628 |
+
elif "sous vide" in text.lower():
|
| 629 |
+
result["keywords"] = result.get("keywords", []) + ["cooking method", "sous vide", "precision cooking"]
|
| 630 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["sous vide"]
|
| 631 |
+
elif "paleo" in text.lower():
|
| 632 |
+
result["keywords"] = result.get("keywords", []) + ["diet", "paleo", "low-carb"]
|
| 633 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["paleo"]
|
| 634 |
+
elif "fodmap" in text.lower():
|
| 635 |
+
result["keywords"] = result.get("keywords", []) + ["diet", "fodmap", "digestive health"]
|
| 636 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["fodmap"]
|
| 637 |
+
elif "cold brew" in text.lower():
|
| 638 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "caffeinated", "cold coffee"]
|
| 639 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "cold brew"]
|
| 640 |
+
elif "matcha" in text.lower():
|
| 641 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "green tea", "matcha"]
|
| 642 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "matcha"]
|
| 643 |
+
elif "smoothie" in text.lower():
|
| 644 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "healthy", "smoothie"]
|
| 645 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "smoothie"]
|
| 646 |
+
elif "protein shake" in text.lower():
|
| 647 |
+
result["keywords"] = result.get("keywords", []) + ["beverages", "high protein", "shake"]
|
| 648 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["beverages", "protein shake"]
|
| 649 |
+
elif "oat milk" in text.lower() or "almond milk" in text.lower():
|
| 650 |
+
result["keywords"] = result.get("keywords", []) + ["dairy-free", "vegan", "plant-based milk"]
|
| 651 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["oat milk" if "oat" in text.lower() else "almond milk"]
|
| 652 |
+
elif "zoodles" in text.lower():
|
| 653 |
+
result["keywords"] = result.get("keywords", []) + ["low carb", "gluten-free", "vegetable noodles", "noodles"]
|
| 654 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["zoodles", "noodles"]
|
| 655 |
+
elif "avocado toast" in text.lower():
|
| 656 |
+
result["keywords"] = result.get("keywords", []) + ["breakfast", "healthy", "avocado"]
|
| 657 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["avocado toast"]
|
| 658 |
+
elif "golden milk" in text.lower():
|
| 659 |
+
result["keywords"] = result.get("keywords", []) + ["beverage", "turmeric", "anti-inflammatory"]
|
| 660 |
+
result["keywords_name"] = result.get("keywords_name", []) + ["golden milk"]
|
| 661 |
+
# Add other cases as needed
|
| 662 |
+
|
| 663 |
+
except json.JSONDecodeError:
|
| 664 |
+
result = {"error": "Failed to parse JSON", "output": output_text}
|
| 665 |
+
|
| 666 |
+
return result
|
| 667 |
+
|
| 668 |
+
# Example usage:
|
| 669 |
+
if __name__ == '__main__':
|
| 670 |
+
test_cases = [
|
| 671 |
+
# "noodles",
|
| 672 |
+
# "need a pasta recipe",
|
| 673 |
+
# "looking for a chicken dish",
|
| 674 |
+
# "want to make something with rice",
|
| 675 |
+
# "need a dessert recipe",
|
| 676 |
+
# "biryani",
|
| 677 |
+
# "30 mins",
|
| 678 |
+
# "chole bhature",
|
| 679 |
+
# "give some recipes involving almonds",
|
| 680 |
+
# "latte with foam, coffee, milk",
|
| 681 |
+
# "cold drink beverage",
|
| 682 |
+
# "beans",
|
| 683 |
+
# "coffee",
|
| 684 |
+
# "latte",
|
| 685 |
+
# "something involving nuts",
|
| 686 |
+
# "i wish to cook something with crab",
|
| 687 |
+
# "livers",
|
| 688 |
+
# "popcorn",
|
| 689 |
+
# "beef stew with potatoes, carrots, and herbs.",
|
| 690 |
+
# "dessert with chocolate, brownie, cake"
|
| 691 |
+
# "chocolate, brownie, cake, brown sugar",
|
| 692 |
+
# "avocado smoothie, avocado, milk, ice",
|
| 693 |
+
# "momo, momo, sauce",
|
| 694 |
+
# "I have basil, tomato and clove what can i make in 30 minutes",
|
| 695 |
+
"basil"
|
| 696 |
+
]
|
| 697 |
+
|
| 698 |
+
for test_input in test_cases:
|
| 699 |
+
print(f"\nTesting: {test_input}")
|
| 700 |
+
result = extract_recipe_attributes(test_input)
|
| 701 |
+
print(json.dumps(result, indent=2))
|
app/services/image_query.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
import google.generativeai as genai
|
| 3 |
+
import PIL.Image
|
| 4 |
+
import io
|
| 5 |
+
import os
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
app = Flask(__name__)
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
# Configure Gemini API - get key from https://makersuite.google.com/app/apikey
|
| 11 |
+
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
|
| 12 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 13 |
+
|
| 14 |
+
# Initialize the model - UPDATED MODEL NAME HERE
|
| 15 |
+
model = genai.GenerativeModel('gemini-1.5-flash') # Changed from gemini-pro-vision
|
| 16 |
+
|
| 17 |
+
def analyze_food_image(image_content) -> str:
|
| 18 |
+
"""
|
| 19 |
+
Analyze image using Gemini API and return food description
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
prompt = """
|
| 23 |
+
Look at this food image and:
|
| 24 |
+
1. Identify the main dish/food item
|
| 25 |
+
2. List visible ingredients or components, including individual words/strings of the main dish
|
| 26 |
+
3. Return ONLY a simple description in this format: [main dish], [ingredients]
|
| 27 |
+
For example: "pizza, pizza, cheese, tomatoes, basil" or "chocolate cake, chocolate, cake, frosting, berries"
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
# Convert bytes to PIL Image
|
| 31 |
+
image_bytes = image_content.read()
|
| 32 |
+
image = PIL.Image.open(io.BytesIO(image_bytes))
|
| 33 |
+
|
| 34 |
+
# Generate response
|
| 35 |
+
response = model.generate_content([prompt, image])
|
| 36 |
+
|
| 37 |
+
# Clean and format the response
|
| 38 |
+
description = response.text.strip().lower()
|
| 39 |
+
description = description.replace('"', '').replace("'", '')
|
| 40 |
+
|
| 41 |
+
print(description) # For testing purpose
|
| 42 |
+
|
| 43 |
+
return description if description else "food dish"
|
| 44 |
+
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error in analysis: {str(e)}")
|
| 47 |
+
return f"food dish (Error: {str(e)})"
|
| 48 |
+
|
| 49 |
+
if __name__ == '__main__':
|
| 50 |
+
pass
|
app/services/image_search.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import asyncio
|
| 3 |
+
import aiohttp
|
| 4 |
+
import random
|
| 5 |
+
import re
|
| 6 |
+
from typing import List, Union
|
| 7 |
+
from app.utils.scrapers.google_scraper import GoogleScraper
|
| 8 |
+
from app.utils.scrapers.food_network_scraper import FoodNetworkScraper
|
| 9 |
+
from app.utils.scrapers.allrecipes_scraper import AllRecipesScraper
|
| 10 |
+
from app.utils.scrapers.wikimedia_scraper import WikimediaScraper
|
| 11 |
+
from app.utils.scrapers.fooddotcom_scraper import FoodDotComScraper
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
class ImageSearchService:
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.scrapers = [
|
| 18 |
+
GoogleScraper(),
|
| 19 |
+
FoodNetworkScraper(),
|
| 20 |
+
AllRecipesScraper(),
|
| 21 |
+
WikimediaScraper(),
|
| 22 |
+
FoodDotComScraper()
|
| 23 |
+
]
|
| 24 |
+
self.session = None
|
| 25 |
+
self.placeholder_images = [
|
| 26 |
+
"https://drive.google.com/file/d/1gYOjs06yiq7EUXaO19BE-L7MkrTR6wlc/view?usp=sharing",
|
| 27 |
+
"https://drive.google.com/file/d/1ob4KbzVLtwsE_ckYKBu_70FLEXNCJRSr/view?usp=sharing",
|
| 28 |
+
"https://drive.google.com/file/d/1UUv3zF1ouXteZVt8Oc_UXORcJrlWfRXR/view?usp=sharing"
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
async def __aenter__(self):
|
| 32 |
+
if self.session is None:
|
| 33 |
+
self.session = aiohttp.ClientSession()
|
| 34 |
+
for scraper in self.scrapers:
|
| 35 |
+
scraper.session = self.session
|
| 36 |
+
logger.info("ImageSearchService session initialized")
|
| 37 |
+
return self
|
| 38 |
+
|
| 39 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 40 |
+
if self.session:
|
| 41 |
+
await self.session.close()
|
| 42 |
+
self.session = None
|
| 43 |
+
logger.info("ImageSearchService session closed")
|
| 44 |
+
|
| 45 |
+
async def search_recipe_images(self, recipe_name: str, image_data: Union[str, float, int], num_images: int = 3) -> List[str]:
|
| 46 |
+
logger.info(f"Searching images for recipe: {recipe_name}")
|
| 47 |
+
|
| 48 |
+
# First try to get existing URLs from the database
|
| 49 |
+
existing_urls = self.extract_urls_from_image_column(image_data)
|
| 50 |
+
if existing_urls:
|
| 51 |
+
logger.info(f"Found {len(existing_urls)} existing URLs")
|
| 52 |
+
return existing_urls[:num_images]
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
# Try to get images from scrapers
|
| 56 |
+
all_results = []
|
| 57 |
+
tasks = []
|
| 58 |
+
|
| 59 |
+
for scraper in self.scrapers:
|
| 60 |
+
task = asyncio.create_task(scraper.search_images(recipe_name, num_images))
|
| 61 |
+
tasks.append(task)
|
| 62 |
+
|
| 63 |
+
logger.info(f"Created {len(tasks)} scraper tasks")
|
| 64 |
+
done, pending = await asyncio.wait(tasks, timeout=60)
|
| 65 |
+
|
| 66 |
+
for task in pending:
|
| 67 |
+
logger.warning(f"Cancelling pending task for {task.get_coro().__name__}")
|
| 68 |
+
task.cancel()
|
| 69 |
+
|
| 70 |
+
for task in done:
|
| 71 |
+
try:
|
| 72 |
+
results = await task
|
| 73 |
+
logger.info(f"Scraper {task.get_coro().__name__} found {len(results)} images")
|
| 74 |
+
all_results.extend(results)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
logger.error(f"Error in scraper task {task.get_coro().__name__}: {str(e)}")
|
| 77 |
+
|
| 78 |
+
# Get unique results
|
| 79 |
+
seen = set()
|
| 80 |
+
unique_results = []
|
| 81 |
+
for url in all_results:
|
| 82 |
+
if url not in seen:
|
| 83 |
+
seen.add(url)
|
| 84 |
+
unique_results.append(url)
|
| 85 |
+
|
| 86 |
+
if unique_results:
|
| 87 |
+
logger.info(f"Found {len(unique_results)} unique image URLs")
|
| 88 |
+
return unique_results[:num_images]
|
| 89 |
+
|
| 90 |
+
# If no images found, return random placeholder images
|
| 91 |
+
logger.info("No images found, using placeholder images")
|
| 92 |
+
selected_placeholders = []
|
| 93 |
+
for _ in range(num_images):
|
| 94 |
+
placeholder = random.choice(self.placeholder_images)
|
| 95 |
+
while placeholder in selected_placeholders and len(selected_placeholders) < len(self.placeholder_images):
|
| 96 |
+
placeholder = random.choice(self.placeholder_images)
|
| 97 |
+
selected_placeholders.append(placeholder)
|
| 98 |
+
|
| 99 |
+
return selected_placeholders
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"Error in image search: {str(e)}")
|
| 103 |
+
# Return placeholder images even in case of error
|
| 104 |
+
return random.sample(self.placeholder_images, min(num_images, len(self.placeholder_images)))
|
| 105 |
+
|
| 106 |
+
def extract_urls_from_image_column(self, image_data: Union[str, float, int]) -> List[str]:
|
| 107 |
+
logger.debug(f"Extracting URLs from image data: {image_data}")
|
| 108 |
+
if image_data is None or image_data == 'NA' or isinstance(image_data, (float, int)):
|
| 109 |
+
logger.debug("No valid image data found in database")
|
| 110 |
+
return []
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
image_data_str = str(image_data)
|
| 114 |
+
urls = []
|
| 115 |
+
if image_data_str.startswith('c(') and image_data_str.endswith(')'):
|
| 116 |
+
content = image_data_str[2:-1].strip()
|
| 117 |
+
parts = re.findall(r'"([^"]*)"', content)
|
| 118 |
+
urls = [url for url in parts if url.startswith('http')]
|
| 119 |
+
else:
|
| 120 |
+
urls = re.findall(r'https?://[^\s,"\')]+', image_data_str)
|
| 121 |
+
|
| 122 |
+
logger.info(f"Extracted {len(urls)} URLs from image column")
|
| 123 |
+
return urls
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logger.error(f"Error extracting URLs from image data: {str(e)}")
|
| 126 |
+
return []
|
app/services/recommendation.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from app.services.image_search import ImageSearchService
|
| 3 |
+
from app.utils.data_loading import load_or_create_data
|
| 4 |
+
from app.utils.recommendation_utils import get_top_recommendations
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class FlexibleRecipeRecommendationSystem:
|
| 9 |
+
def __init__(self, csv_file_path, precomputed_dir):
|
| 10 |
+
self.feature_weights = {
|
| 11 |
+
'ingredients': 0.15, 'category': 0.25, 'dietary': 0.20,
|
| 12 |
+
'calories': 0.10, 'time': 0.10, 'keywords': 0.10, 'keywords_name': 0.10
|
| 13 |
+
}
|
| 14 |
+
self.image_search_service = ImageSearchService()
|
| 15 |
+
self.data = load_or_create_data(csv_file_path, precomputed_dir, self.feature_weights)
|
| 16 |
+
|
| 17 |
+
async def get_recommendations(self, category=None, dietary_preference=None, ingredients=None,
|
| 18 |
+
calories=None, time=None, keywords=None, keywords_name=None, top_n=6):
|
| 19 |
+
return await get_top_recommendations(
|
| 20 |
+
self.data['df'], self.data['combined_matrix'],
|
| 21 |
+
self.data['tfidf_vectorizer_ingredients'],
|
| 22 |
+
self.data['tfidf_vectorizer_keywords'],
|
| 23 |
+
self.data['tfidf_vectorizer_keywords_name'],
|
| 24 |
+
self.data['category_dummies'], self.data['scaler'],
|
| 25 |
+
self.feature_weights, self.image_search_service,
|
| 26 |
+
category, dietary_preference, ingredients,
|
| 27 |
+
calories, time, keywords, keywords_name, top_n
|
| 28 |
+
)
|
app/utils/__pycache__/data_loading.cpython-312.pyc
ADDED
|
Binary file (2.95 kB). View file
|
|
|
app/utils/__pycache__/data_preprocessing.cpython-312.pyc
ADDED
|
Binary file (4.73 kB). View file
|
|
|
app/utils/__pycache__/feature_engineering.cpython-312.pyc
ADDED
|
Binary file (6.39 kB). View file
|
|
|
app/utils/__pycache__/recommendation_utils.cpython-312.pyc
ADDED
|
Binary file (3.45 kB). View file
|
|
|
app/utils/__pycache__/similarity_calculation.cpython-312.pyc
ADDED
|
Binary file (1.31 kB). View file
|
|
|
app/utils/data_loading.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import joblib
|
| 3 |
+
from scipy.sparse import save_npz, load_npz
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from app.utils.data_preprocessing import preprocess_data
|
| 6 |
+
from app.utils.feature_engineering import create_feature_matrices
|
| 7 |
+
|
| 8 |
+
def load_or_create_data(csv_file_path, precomputed_dir, feature_weights):
|
| 9 |
+
files = ['df', 'tfidf_vectorizer_ingredients', 'tfidf_vectorizer_keywords',
|
| 10 |
+
'tfidf_vectorizer_keywords_name', 'category_dummies', 'scaler']
|
| 11 |
+
|
| 12 |
+
if all(os.path.exists(os.path.join(precomputed_dir, f'{f}.joblib')) for f in files) and \
|
| 13 |
+
os.path.exists(os.path.join(precomputed_dir, 'combined_matrix.npz')):
|
| 14 |
+
return load_precomputed_data(precomputed_dir)
|
| 15 |
+
else:
|
| 16 |
+
return compute_and_save_data(csv_file_path, precomputed_dir, feature_weights)
|
| 17 |
+
|
| 18 |
+
def load_precomputed_data(precomputed_dir):
|
| 19 |
+
data = {}
|
| 20 |
+
for f in ['df', 'tfidf_vectorizer_ingredients', 'tfidf_vectorizer_keywords',
|
| 21 |
+
'tfidf_vectorizer_keywords_name', 'category_dummies', 'scaler']:
|
| 22 |
+
data[f] = joblib.load(os.path.join(precomputed_dir, f'{f}.joblib'))
|
| 23 |
+
data['combined_matrix'] = load_npz(os.path.join(precomputed_dir, 'combined_matrix.npz'))
|
| 24 |
+
return data
|
| 25 |
+
|
| 26 |
+
def compute_and_save_data(csv_file_path, precomputed_dir, feature_weights):
|
| 27 |
+
df = preprocess_data(pd.read_csv(csv_file_path))
|
| 28 |
+
results = create_feature_matrices(df, feature_weights)
|
| 29 |
+
combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords, \
|
| 30 |
+
tfidf_vectorizer_keywords_name, category_dummies, scaler = results
|
| 31 |
+
|
| 32 |
+
os.makedirs(precomputed_dir, exist_ok=True)
|
| 33 |
+
data = {
|
| 34 |
+
'df': df,
|
| 35 |
+
'tfidf_vectorizer_ingredients': tfidf_vectorizer_ingredients,
|
| 36 |
+
'tfidf_vectorizer_keywords': tfidf_vectorizer_keywords,
|
| 37 |
+
'tfidf_vectorizer_keywords_name': tfidf_vectorizer_keywords_name,
|
| 38 |
+
'category_dummies': category_dummies,
|
| 39 |
+
'scaler': scaler,
|
| 40 |
+
'combined_matrix': combined_matrix
|
| 41 |
+
}
|
| 42 |
+
for name, obj in data.items():
|
| 43 |
+
if name == 'combined_matrix':
|
| 44 |
+
save_npz(os.path.join(precomputed_dir, f'{name}.npz'), obj)
|
| 45 |
+
else:
|
| 46 |
+
joblib.dump(obj, os.path.join(precomputed_dir, f'{name}.joblib'))
|
| 47 |
+
return data
|
app/utils/data_preprocessing.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import ast
|
| 4 |
+
import logging
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
def parse_r_vector(s):
|
| 10 |
+
"""
|
| 11 |
+
Parse R vector format strings like c("word1", "word2") into Python lists.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
s: String in R vector format
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
List of strings
|
| 18 |
+
"""
|
| 19 |
+
if pd.isna(s):
|
| 20 |
+
return []
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
# Remove the c() wrapper and split by commas
|
| 24 |
+
if isinstance(s, str) and s.startswith('c(') and s.endswith(')'):
|
| 25 |
+
# Extract content between c( and )
|
| 26 |
+
content = s[2:-1].strip()
|
| 27 |
+
|
| 28 |
+
# Use regex to properly split quoted strings
|
| 29 |
+
pattern = r'"([^"]*)"'
|
| 30 |
+
matches = re.findall(pattern, content)
|
| 31 |
+
|
| 32 |
+
# Filter out empty strings and NA values
|
| 33 |
+
ingredients = [item.strip() for item in matches if item.strip() and item.lower() != 'na']
|
| 34 |
+
return ingredients
|
| 35 |
+
elif isinstance(s, list):
|
| 36 |
+
return s
|
| 37 |
+
else:
|
| 38 |
+
return []
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.warning(f"Error parsing R vector: {s}, Error: {str(e)}")
|
| 41 |
+
return []
|
| 42 |
+
|
| 43 |
+
def preprocess_data(df):
|
| 44 |
+
"""
|
| 45 |
+
Preprocess the dataframe by handling boolean, numerical, and list-like columns.
|
| 46 |
+
"""
|
| 47 |
+
bool_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
|
| 48 |
+
'is_low carb', 'is_keto', 'is_paleo']
|
| 49 |
+
for col in bool_columns:
|
| 50 |
+
df[col] = df[col].map({'TRUE': 1, 'FALSE': 0, True: 1, False: 0}).fillna(0).astype(int)
|
| 51 |
+
|
| 52 |
+
numerical_columns = ['Calories', 'TotalTime_minutes', 'AggregatedRating', 'ReviewCount']
|
| 53 |
+
for col in numerical_columns:
|
| 54 |
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 55 |
+
median_value = df[col].median()
|
| 56 |
+
df[col] = df[col].fillna(median_value)
|
| 57 |
+
|
| 58 |
+
# Handle R vector format columns
|
| 59 |
+
r_vector_columns = ['RecipeIngredientParts', 'RecipeInstructions', 'RecipeIngredientQuantities']
|
| 60 |
+
for col in r_vector_columns:
|
| 61 |
+
df[col] = df[col].apply(parse_r_vector)
|
| 62 |
+
|
| 63 |
+
# Handle regular list columns
|
| 64 |
+
list_columns = ['Keywords', 'keywords_name']
|
| 65 |
+
for col in list_columns:
|
| 66 |
+
df[col] = df[col].apply(parse_list_string)
|
| 67 |
+
|
| 68 |
+
return df
|
| 69 |
+
|
| 70 |
+
def parse_list_string(s):
|
| 71 |
+
"""
|
| 72 |
+
Safely parse list-like strings.
|
| 73 |
+
"""
|
| 74 |
+
if pd.isna(s):
|
| 75 |
+
return []
|
| 76 |
+
try:
|
| 77 |
+
if isinstance(s, str):
|
| 78 |
+
parsed = ast.literal_eval(s)
|
| 79 |
+
return parsed if isinstance(parsed, list) else [s]
|
| 80 |
+
elif isinstance(s, list):
|
| 81 |
+
return s
|
| 82 |
+
return []
|
| 83 |
+
except (ValueError, SyntaxError):
|
| 84 |
+
return [s] if s else []
|
| 85 |
+
|
| 86 |
+
def parse_recipe_ingredients(ingredient_parts):
|
| 87 |
+
"""
|
| 88 |
+
Parse RecipeIngredientParts field handling R vector format.
|
| 89 |
+
"""
|
| 90 |
+
return parse_r_vector(ingredient_parts)
|
| 91 |
+
|
| 92 |
+
def parse_list_field(field):
|
| 93 |
+
"""
|
| 94 |
+
Parse a list field, handling various input types including R vectors.
|
| 95 |
+
"""
|
| 96 |
+
if pd.isna(field):
|
| 97 |
+
return []
|
| 98 |
+
if isinstance(field, list):
|
| 99 |
+
return field
|
| 100 |
+
elif isinstance(field, str):
|
| 101 |
+
if field.startswith('c('):
|
| 102 |
+
return parse_r_vector(field)
|
| 103 |
+
try:
|
| 104 |
+
parsed = ast.literal_eval(field)
|
| 105 |
+
return parsed if isinstance(parsed, list) else []
|
| 106 |
+
except (ValueError, SyntaxError):
|
| 107 |
+
return []
|
| 108 |
+
return []
|
app/utils/feature_engineering.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 2 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 3 |
+
from scipy.sparse import hstack
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
def create_feature_matrices(df, feature_weights):
|
| 8 |
+
"""
|
| 9 |
+
Create feature matrices for the recommendation system.
|
| 10 |
+
"""
|
| 11 |
+
tfidf_vectorizer_ingredients = TfidfVectorizer(
|
| 12 |
+
stop_words='english',
|
| 13 |
+
max_features=5000,
|
| 14 |
+
ngram_range=(1, 2),
|
| 15 |
+
min_df=1
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
ingredients_text = df['RecipeIngredientParts'].apply(lambda x: ' '.join(x) if x else '')
|
| 19 |
+
tfidf_matrix_ingredients = tfidf_vectorizer_ingredients.fit_transform(ingredients_text)
|
| 20 |
+
|
| 21 |
+
tfidf_vectorizer_keywords = TfidfVectorizer(stop_words='english', max_features=3000)
|
| 22 |
+
tfidf_vectorizer_keywords_name = TfidfVectorizer(stop_words='english', max_features=3000)
|
| 23 |
+
|
| 24 |
+
keywords_text = df['Keywords'].apply(lambda x: ' '.join(x) if x else '')
|
| 25 |
+
keywords_name_text = df['keywords_name'].apply(lambda x: ' '.join(x) if x else '')
|
| 26 |
+
|
| 27 |
+
tfidf_matrix_keywords = tfidf_vectorizer_keywords.fit_transform(keywords_text)
|
| 28 |
+
tfidf_matrix_keywords_name = tfidf_vectorizer_keywords_name.fit_transform(keywords_name_text)
|
| 29 |
+
|
| 30 |
+
category_dummies = pd.get_dummies(df['RecipeCategory'])
|
| 31 |
+
category_matrix = category_dummies.values
|
| 32 |
+
|
| 33 |
+
dietary_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
|
| 34 |
+
'is_low carb', 'is_keto', 'is_paleo']
|
| 35 |
+
dietary_matrix = df[dietary_columns].values
|
| 36 |
+
|
| 37 |
+
scaler = MinMaxScaler()
|
| 38 |
+
calories_matrix = scaler.fit_transform(df[['Calories']].values)
|
| 39 |
+
time_matrix = scaler.fit_transform(df[['TotalTime_minutes']].values)
|
| 40 |
+
rating_matrix = scaler.fit_transform(df[['AggregatedRating']].values)
|
| 41 |
+
|
| 42 |
+
combined_matrix = hstack([
|
| 43 |
+
tfidf_matrix_ingredients * feature_weights['ingredients'],
|
| 44 |
+
category_matrix * feature_weights['category'],
|
| 45 |
+
dietary_matrix * feature_weights['dietary'],
|
| 46 |
+
calories_matrix * feature_weights['calories'],
|
| 47 |
+
time_matrix * feature_weights['time'],
|
| 48 |
+
tfidf_matrix_keywords * feature_weights['keywords'],
|
| 49 |
+
tfidf_matrix_keywords_name * feature_weights['keywords_name'],
|
| 50 |
+
rating_matrix * 0.05 # Small weight for ratings in base similarity
|
| 51 |
+
])
|
| 52 |
+
|
| 53 |
+
return (combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords,
|
| 54 |
+
tfidf_vectorizer_keywords_name, category_dummies, scaler)
|
| 55 |
+
|
| 56 |
+
def create_query_vector(combined_matrix, tfidf_vectorizer_ingredients, tfidf_vectorizer_keywords,
|
| 57 |
+
tfidf_vectorizer_keywords_name, category_dummies, scaler, feature_weights, **kwargs):
|
| 58 |
+
"""
|
| 59 |
+
Create a query vector based on user input.
|
| 60 |
+
"""
|
| 61 |
+
query_vector = np.zeros((1, combined_matrix.shape[1]))
|
| 62 |
+
current_position = 0
|
| 63 |
+
|
| 64 |
+
if kwargs.get('ingredients'):
|
| 65 |
+
ingredients_query = tfidf_vectorizer_ingredients.transform([' '.join(kwargs['ingredients'])])
|
| 66 |
+
query_vector[:, :ingredients_query.shape[1]] = ingredients_query.toarray() * feature_weights['ingredients']
|
| 67 |
+
current_position += ingredients_query.shape[1]
|
| 68 |
+
|
| 69 |
+
category_vector = np.zeros((1, category_dummies.shape[1]))
|
| 70 |
+
if kwargs.get('category') and kwargs['category'] in category_dummies.columns:
|
| 71 |
+
category_index = category_dummies.columns.get_loc(kwargs['category'])
|
| 72 |
+
category_vector[0, category_index] = 1
|
| 73 |
+
query_vector[:, current_position:current_position + category_dummies.shape[1]] = (
|
| 74 |
+
category_vector * feature_weights['category']
|
| 75 |
+
)
|
| 76 |
+
current_position += category_dummies.shape[1]
|
| 77 |
+
|
| 78 |
+
dietary_columns = ['is_vegetarian', 'is_vegan', 'is_gluten free', 'is_dairy free',
|
| 79 |
+
'is_low carb', 'is_keto', 'is_paleo']
|
| 80 |
+
dietary_vector = np.zeros((1, len(dietary_columns)))
|
| 81 |
+
if kwargs.get('dietary_preference') in dietary_columns:
|
| 82 |
+
dietary_index = dietary_columns.index(kwargs['dietary_preference'])
|
| 83 |
+
dietary_vector[0, dietary_index] = 1
|
| 84 |
+
query_vector[:, current_position:current_position + len(dietary_columns)] = (
|
| 85 |
+
dietary_vector * feature_weights['dietary']
|
| 86 |
+
)
|
| 87 |
+
current_position += len(dietary_columns)
|
| 88 |
+
|
| 89 |
+
calories_vector = np.zeros((1, 1))
|
| 90 |
+
time_vector = np.zeros((1, 1))
|
| 91 |
+
|
| 92 |
+
if kwargs.get('calories'):
|
| 93 |
+
calories_vector[0, 0] = kwargs['calories']
|
| 94 |
+
if kwargs.get('time'):
|
| 95 |
+
time_vector[0, 0] = kwargs['time']
|
| 96 |
+
|
| 97 |
+
calories_vector = scaler.transform(calories_vector)
|
| 98 |
+
time_vector = scaler.transform(time_vector)
|
| 99 |
+
|
| 100 |
+
query_vector[:, current_position:current_position + 1] = calories_vector * feature_weights['calories']
|
| 101 |
+
current_position += 1
|
| 102 |
+
query_vector[:, current_position:current_position + 1] = time_vector * feature_weights['time']
|
| 103 |
+
current_position += 1
|
| 104 |
+
|
| 105 |
+
if kwargs.get('keywords'):
|
| 106 |
+
keywords_query = tfidf_vectorizer_keywords.transform([' '.join(kwargs['keywords'])])
|
| 107 |
+
query_vector[:, current_position:current_position + keywords_query.shape[1]] = (
|
| 108 |
+
keywords_query.toarray() * feature_weights['keywords']
|
| 109 |
+
)
|
| 110 |
+
current_position += keywords_query.shape[1]
|
| 111 |
+
|
| 112 |
+
if kwargs.get('keywords_name'):
|
| 113 |
+
keywords_name_query = tfidf_vectorizer_keywords_name.transform([' '.join(kwargs['keywords_name'])])
|
| 114 |
+
query_vector[:, current_position:current_position + keywords_name_query.shape[1]] = (
|
| 115 |
+
keywords_name_query.toarray() * feature_weights['keywords_name']
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
return query_vector
|
app/utils/recommendation_utils.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from app.models.recipe import Recipe
|
| 3 |
+
from app.utils.feature_engineering import create_query_vector
|
| 4 |
+
from app.utils.similarity_calculation import calculate_weighted_similarity
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
async def get_top_recommendations(df, combined_matrix, tfidf_vectorizer_ingredients,
|
| 9 |
+
tfidf_vectorizer_keywords, tfidf_vectorizer_keywords_name,
|
| 10 |
+
category_dummies, scaler, feature_weights, image_search_service,
|
| 11 |
+
category=None, dietary_preference=None, ingredients=None,
|
| 12 |
+
calories=None, time=None, keywords=None, keywords_name=None, top_n=5):
|
| 13 |
+
logger.info(f"Starting recommendation process for category: {category}, dietary_preference: {dietary_preference}")
|
| 14 |
+
|
| 15 |
+
query_vector = create_query_vector(combined_matrix, tfidf_vectorizer_ingredients,
|
| 16 |
+
tfidf_vectorizer_keywords, tfidf_vectorizer_keywords_name,
|
| 17 |
+
category_dummies, scaler, feature_weights,
|
| 18 |
+
category=category, dietary_preference=dietary_preference,
|
| 19 |
+
ingredients=ingredients, calories=calories, time=time,
|
| 20 |
+
keywords=keywords, keywords_name=keywords_name)
|
| 21 |
+
|
| 22 |
+
similarity_scores = calculate_weighted_similarity(query_vector, combined_matrix, df, calories, time)
|
| 23 |
+
|
| 24 |
+
if category:
|
| 25 |
+
similarity_scores *= (df['RecipeCategory'] == category)
|
| 26 |
+
|
| 27 |
+
top_indices = similarity_scores.argsort()[-top_n*3:][::-1]
|
| 28 |
+
logger.info(f"Found {len(top_indices)} potential recommendations")
|
| 29 |
+
|
| 30 |
+
results = []
|
| 31 |
+
async with image_search_service as image_service:
|
| 32 |
+
for idx in top_indices:
|
| 33 |
+
if len(results) >= top_n:
|
| 34 |
+
break
|
| 35 |
+
|
| 36 |
+
recipe = df.iloc[idx]
|
| 37 |
+
|
| 38 |
+
if category and recipe['RecipeCategory'] != category:
|
| 39 |
+
continue
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
image_urls = await image_service.search_recipe_images(recipe['Name'], recipe['Images'], 3)
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.error(f"Error searching images for {recipe['Name']}: {str(e)}")
|
| 45 |
+
image_urls = []
|
| 46 |
+
|
| 47 |
+
results.append(Recipe(
|
| 48 |
+
RecipeId=int(recipe['RecipeId']),
|
| 49 |
+
Name=recipe['Name'],
|
| 50 |
+
RecipeCategory=recipe['RecipeCategory'],
|
| 51 |
+
RecipeIngredientParts=recipe['RecipeIngredientParts'],
|
| 52 |
+
Keywords=recipe['Keywords'],
|
| 53 |
+
keywords_name=recipe['keywords_name'],
|
| 54 |
+
Calories=float(recipe['Calories']),
|
| 55 |
+
TotalTime_minutes=int(recipe['TotalTime_minutes']),
|
| 56 |
+
AggregatedRating=float(recipe['AggregatedRating']),
|
| 57 |
+
ReviewCount=int(recipe['ReviewCount']),
|
| 58 |
+
Description=recipe['Description'],
|
| 59 |
+
RecipeIngredientQuantities=recipe['RecipeIngredientQuantities'],
|
| 60 |
+
RecipeInstructions=recipe['RecipeInstructions'],
|
| 61 |
+
Images=image_urls,
|
| 62 |
+
Similarity=float(similarity_scores[idx])
|
| 63 |
+
))
|
| 64 |
+
|
| 65 |
+
logger.info(f"Returning {len(results)} recommendations")
|
| 66 |
+
return results[:top_n]
|
app/utils/scrapers/__pycache__/allrecipes_scraper.cpython-312.pyc
ADDED
|
Binary file (3.03 kB). View file
|
|
|
app/utils/scrapers/__pycache__/base_scraper.cpython-312.pyc
ADDED
|
Binary file (3 kB). View file
|
|
|
app/utils/scrapers/__pycache__/food_network_scraper.cpython-312.pyc
ADDED
|
Binary file (2.74 kB). View file
|
|
|
app/utils/scrapers/__pycache__/fooddotcom_scraper.cpython-312.pyc
ADDED
|
Binary file (3.87 kB). View file
|
|
|
app/utils/scrapers/__pycache__/google_scraper.cpython-312.pyc
ADDED
|
Binary file (3.11 kB). View file
|
|
|
app/utils/scrapers/__pycache__/wikimedia_scraper.cpython-312.pyc
ADDED
|
Binary file (2.92 kB). View file
|
|
|
app/utils/scrapers/allrecipes_scraper.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
from urllib.parse import quote
|
| 3 |
+
from typing import List
|
| 4 |
+
from .base_scraper import BaseScraper
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
class AllRecipesScraper(BaseScraper):
|
| 10 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 11 |
+
search_query = quote(recipe_name)
|
| 12 |
+
url = f"https://www.allrecipes.com/search?q={search_query}"
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
async with self.session.get(url, headers=await self.get_headers()) as response:
|
| 16 |
+
if response.status != 200:
|
| 17 |
+
return []
|
| 18 |
+
|
| 19 |
+
html = await response.text()
|
| 20 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 21 |
+
images = set()
|
| 22 |
+
|
| 23 |
+
for img in soup.find_all('img'):
|
| 24 |
+
src = img.get('src') or img.get('data-src')
|
| 25 |
+
if src and not any(x in src.lower() for x in ['icon', 'logo', 'advertisement']):
|
| 26 |
+
images.add(src)
|
| 27 |
+
|
| 28 |
+
valid_images = []
|
| 29 |
+
for img_url in images:
|
| 30 |
+
if len(valid_images) >= num_images:
|
| 31 |
+
break
|
| 32 |
+
if await self.verify_image_url(img_url):
|
| 33 |
+
valid_images.append(img_url)
|
| 34 |
+
|
| 35 |
+
return valid_images
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.error(f"AllRecipes scraping error: {str(e)}")
|
| 38 |
+
return []
|
app/utils/scrapers/base_scraper.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import aiohttp
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
from typing import List
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
class BaseScraper(ABC):
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.user_agents = [
|
| 12 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 13 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
| 14 |
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15',
|
| 15 |
+
]
|
| 16 |
+
self.session = None
|
| 17 |
+
|
| 18 |
+
async def get_headers(self):
|
| 19 |
+
return {
|
| 20 |
+
'User-Agent': random.choice(self.user_agents),
|
| 21 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 22 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 23 |
+
'Referer': 'https://www.google.com/',
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
@abstractmethod
|
| 27 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 28 |
+
pass
|
| 29 |
+
|
| 30 |
+
async def verify_image_url(self, url: str) -> bool:
|
| 31 |
+
try:
|
| 32 |
+
async with self.session.head(url, allow_redirects=True, timeout=60) as response:
|
| 33 |
+
content_type = response.headers.get('content-type', '')
|
| 34 |
+
return (response.status == 200 and
|
| 35 |
+
'image' in content_type and
|
| 36 |
+
not any(x in url.lower() for x in ['placeholder', 'default', 'missing']))
|
| 37 |
+
except:
|
| 38 |
+
return False
|
app/utils/scrapers/food_network_scraper.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
from urllib.parse import quote
|
| 3 |
+
from .base_scraper import BaseScraper
|
| 4 |
+
from typing import List
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class FoodNetworkScraper(BaseScraper):
|
| 11 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 12 |
+
search_query = quote(recipe_name)
|
| 13 |
+
url = f"https://www.foodnetwork.com/search/{search_query}-"
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
async with self.session.get(url, headers=await self.get_headers()) as response:
|
| 17 |
+
if response.status != 200:
|
| 18 |
+
return []
|
| 19 |
+
|
| 20 |
+
html = await response.text()
|
| 21 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 22 |
+
images = set()
|
| 23 |
+
|
| 24 |
+
for img in soup.find_all('img', {'data-src': True}):
|
| 25 |
+
src = img.get('data-src')
|
| 26 |
+
if src and 'thumbnail' not in src.lower():
|
| 27 |
+
images.add(src)
|
| 28 |
+
|
| 29 |
+
valid_images = []
|
| 30 |
+
for img_url in images:
|
| 31 |
+
if len(valid_images) >= num_images:
|
| 32 |
+
break
|
| 33 |
+
if await self.verify_image_url(img_url):
|
| 34 |
+
valid_images.append(img_url)
|
| 35 |
+
|
| 36 |
+
return valid_images
|
| 37 |
+
except Exception as e:
|
| 38 |
+
logger.error(f"Food Network scraping error: {str(e)}")
|
| 39 |
+
return []
|
app/utils/scrapers/fooddotcom_scraper.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
import re
|
| 3 |
+
from typing import List
|
| 4 |
+
from urllib.parse import quote
|
| 5 |
+
from .base_scraper import BaseScraper
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class FoodDotComScraper(BaseScraper):
|
| 11 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 12 |
+
search_query = quote(recipe_name)
|
| 13 |
+
url = f"https://www.food.com/search/{search_query}?pn=1"
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
async with self.session.get(url, headers=await self.get_headers()) as response:
|
| 17 |
+
if response.status != 200:
|
| 18 |
+
return []
|
| 19 |
+
|
| 20 |
+
html = await response.text()
|
| 21 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 22 |
+
images = set()
|
| 23 |
+
|
| 24 |
+
# Look for recipe cards which usually contain the main images
|
| 25 |
+
recipe_cards = soup.find_all('div', {'class': 'recipe-card'})
|
| 26 |
+
for card in recipe_cards:
|
| 27 |
+
# Check for lazy-loaded images
|
| 28 |
+
img_tags = card.find_all('img', {'data-src': True})
|
| 29 |
+
for img in img_tags:
|
| 30 |
+
src = img.get('data-src')
|
| 31 |
+
if src:
|
| 32 |
+
# Food.com often uses different image sizes, try to get the largest
|
| 33 |
+
# Replace size parameters in URL to get larger images
|
| 34 |
+
src = re.sub(r's\d+-c', 's800-c', src)
|
| 35 |
+
images.add(src)
|
| 36 |
+
|
| 37 |
+
# Check for regular images
|
| 38 |
+
img_tags = card.find_all('img', {'src': True})
|
| 39 |
+
for img in img_tags:
|
| 40 |
+
src = img.get('src')
|
| 41 |
+
if src and not any(x in src.lower() for x in ['icon', 'logo', 'advertisement']):
|
| 42 |
+
src = re.sub(r's\d+-c', 's800-c', src)
|
| 43 |
+
images.add(src)
|
| 44 |
+
|
| 45 |
+
# If no recipe cards found, try finding images in the main content
|
| 46 |
+
if not images:
|
| 47 |
+
img_tags = soup.find_all('img', {'class': 'recipe-image'})
|
| 48 |
+
for img in img_tags:
|
| 49 |
+
src = img.get('src') or img.get('data-src')
|
| 50 |
+
if src:
|
| 51 |
+
src = re.sub(r's\d+-c', 's800-c', src)
|
| 52 |
+
images.add(src)
|
| 53 |
+
|
| 54 |
+
valid_images = []
|
| 55 |
+
for img_url in images:
|
| 56 |
+
if len(valid_images) >= num_images:
|
| 57 |
+
break
|
| 58 |
+
if await self.verify_image_url(img_url):
|
| 59 |
+
valid_images.append(img_url)
|
| 60 |
+
|
| 61 |
+
return valid_images
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Food.com scraping error: {str(e)}")
|
| 65 |
+
return []
|
app/utils/scrapers/google_scraper.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
import re
|
| 3 |
+
from urllib.parse import quote, unquote
|
| 4 |
+
from .base_scraper import BaseScraper
|
| 5 |
+
import logging
|
| 6 |
+
from typing import List
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
class GoogleScraper(BaseScraper):
|
| 12 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 13 |
+
search_query = f"{recipe_name} recipe food"
|
| 14 |
+
url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
async with self.session.get(url, headers=await self.get_headers()) as response:
|
| 18 |
+
if response.status != 200:
|
| 19 |
+
return []
|
| 20 |
+
|
| 21 |
+
html = await response.text()
|
| 22 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 23 |
+
images = set()
|
| 24 |
+
|
| 25 |
+
# Extract from JSON-like data in scripts
|
| 26 |
+
for script in soup.find_all('script'):
|
| 27 |
+
if script.string and 'AF_initDataCallback' in script.string:
|
| 28 |
+
urls = re.findall(r'(https?://\S+\.(?:jpg|jpeg|png))', script.string)
|
| 29 |
+
images.update(unquote(url) for url in urls)
|
| 30 |
+
|
| 31 |
+
# Verify URLs and take only valid ones
|
| 32 |
+
valid_images = []
|
| 33 |
+
for img_url in images:
|
| 34 |
+
if len(valid_images) >= num_images:
|
| 35 |
+
break
|
| 36 |
+
if await self.verify_image_url(img_url):
|
| 37 |
+
valid_images.append(img_url)
|
| 38 |
+
|
| 39 |
+
return valid_images
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.error(f"Google scraping error: {str(e)}")
|
| 42 |
+
return []
|
app/utils/scrapers/wikimedia_scraper.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from urllib.parse import quote
|
| 2 |
+
from typing import List
|
| 3 |
+
from .base_scraper import BaseScraper
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class WikimediaScraper(BaseScraper):
|
| 9 |
+
async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
|
| 10 |
+
search_query = quote(recipe_name)
|
| 11 |
+
url = f"https://commons.wikimedia.org/w/api.php"
|
| 12 |
+
params = {
|
| 13 |
+
"action": "query",
|
| 14 |
+
"format": "json",
|
| 15 |
+
"list": "search",
|
| 16 |
+
"srsearch": f"{search_query} food",
|
| 17 |
+
"srnamespace": "6", # File namespace
|
| 18 |
+
"srlimit": num_images
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
async with self.session.get(url, params=params, headers=await self.get_headers()) as response:
|
| 23 |
+
if response.status != 200:
|
| 24 |
+
return []
|
| 25 |
+
|
| 26 |
+
data = await response.json()
|
| 27 |
+
images = set()
|
| 28 |
+
|
| 29 |
+
for item in data.get('query', {}).get('search', []):
|
| 30 |
+
title = item.get('title', '')
|
| 31 |
+
if title.startswith('File:'):
|
| 32 |
+
file_url = f"https://commons.wikimedia.org/wiki/Special:FilePath/{quote(title[5:])}"
|
| 33 |
+
images.add(file_url)
|
| 34 |
+
|
| 35 |
+
valid_images = []
|
| 36 |
+
for img_url in images:
|
| 37 |
+
if len(valid_images) >= num_images:
|
| 38 |
+
break
|
| 39 |
+
if await self.verify_image_url(img_url):
|
| 40 |
+
valid_images.append(img_url)
|
| 41 |
+
|
| 42 |
+
return valid_images
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.error(f"Wikimedia scraping error: {str(e)}")
|
| 45 |
+
return []
|
app/utils/similarity_calculation.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def calculate_weighted_similarity(query_vector, combined_matrix, df, target_calories=None, target_time=None):
|
| 5 |
+
"""
|
| 6 |
+
Calculate weighted similarity scores between the query vector and the combined matrix.
|
| 7 |
+
"""
|
| 8 |
+
base_similarity = cosine_similarity(query_vector, combined_matrix).flatten()
|
| 9 |
+
|
| 10 |
+
penalties = np.ones_like(base_similarity)
|
| 11 |
+
|
| 12 |
+
if target_calories is not None:
|
| 13 |
+
calorie_diff = np.abs(df['Calories'].values - target_calories)
|
| 14 |
+
calorie_penalty = 1 - (calorie_diff / df['Calories'].max())
|
| 15 |
+
penalties *= calorie_penalty
|
| 16 |
+
|
| 17 |
+
if target_time is not None:
|
| 18 |
+
time_diff = np.abs(df['TotalTime_minutes'].values - target_time)
|
| 19 |
+
time_penalty = 1 - (time_diff / df['TotalTime_minutes'].max())
|
| 20 |
+
penalties *= time_penalty
|
| 21 |
+
|
| 22 |
+
return base_similarity * penalties
|
config.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
class Config:
|
| 4 |
+
CSV_FILE_PATH = os.path.join(os.path.dirname(__file__), 'recipe_dataset.csv')
|
| 5 |
+
PRECOMPUTED_DIR = 'precomputed'
|
| 6 |
+
|
form_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
precomputed/category_dummies.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f16cabab6b55b1592c9645bd51bae137af0a740b165c983be6d2e70a5a4ce010
|
| 3 |
+
size 165342541
|
precomputed/combined_matrix.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6192a266f00edac60dc58ae9414d925313df00f32d51fa063816b69931f23ac
|
| 3 |
+
size 146356536
|
precomputed/df.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d96d26fed0f3b971dc44a540e4a55260e1ce3d966f1dd50b22e98bf8e08dafd9
|
| 3 |
+
size 799635866
|
precomputed/scaler.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ee2fe48286554095f1c37aaff1dd00cf264cce4ce89a6aadc04de24a421a365
|
| 3 |
+
size 719
|
precomputed/tfidf_vectorizer_ingredients.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7450c31c3bca4362d8f95282756baf6a97dc0b57c9c1b06328e06212a5bc9789
|
| 3 |
+
size 208006
|
precomputed/tfidf_vectorizer_keywords.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91c80bcf57cc31d546f0362cb4d54d0eff63ce903d026a45aaa287322afdeca4
|
| 3 |
+
size 15243
|
precomputed/tfidf_vectorizer_keywords_name.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eda812093fd905b029de6c59f3db10586d09570d8508b677cceb4f9a58bcadf
|
| 3 |
+
size 109419
|
recipe_dataset.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:076a78cbd94ba5b8b0cba19591b0d96edcedd8d020d0143aae63b81eda3d1e91
|
| 3 |
+
size 713412559
|
run.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# run.py
|
| 2 |
+
from app import create_app
|
| 3 |
+
from flask import Flask, jsonify
|
| 4 |
+
from flask_cors import CORS
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
app = create_app()
|
| 8 |
+
CORS(app, resources={r"/*": {"origins": "http://localhost:3000"}})
|
| 9 |
+
|
| 10 |
+
if __name__ == '__main__':
|
| 11 |
+
app.run(debug=True)
|