Spaces:
Sleeping
Sleeping
Update tools/recipe_tools.py
Browse files- tools/recipe_tools.py +67 -72
tools/recipe_tools.py
CHANGED
|
@@ -1,67 +1,54 @@
|
|
| 1 |
-
from
|
| 2 |
-
from typing import List, Dict
|
| 3 |
import re
|
| 4 |
from collections import defaultdict
|
| 5 |
from smolagents import tool
|
| 6 |
-
from bs4 import BeautifulSoup
|
| 7 |
-
import requests
|
| 8 |
-
|
| 9 |
-
@dataclass
|
| 10 |
-
class Recipe:
|
| 11 |
-
title: str
|
| 12 |
-
ingredients: List[Dict]
|
| 13 |
-
instructions: List[str]
|
| 14 |
-
prep_time: Optional[str] = None
|
| 15 |
-
cook_time: Optional[str] = None
|
| 16 |
-
servings: Optional[int] = None
|
| 17 |
-
source_url: Optional[str] = None
|
| 18 |
-
|
| 19 |
|
| 20 |
@tool
|
| 21 |
-
def
|
| 22 |
"""
|
| 23 |
-
|
| 24 |
|
| 25 |
Args:
|
| 26 |
-
|
| 27 |
|
| 28 |
Returns:
|
| 29 |
-
|
| 30 |
"""
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
"
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
-
}
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
@tool
|
| 62 |
def organize_shopping_list(ingredients: List[Dict]) -> str:
|
| 63 |
"""
|
| 64 |
-
|
| 65 |
|
| 66 |
Args:
|
| 67 |
ingredients: List of ingredient dictionaries
|
|
@@ -70,10 +57,10 @@ def organize_shopping_list(ingredients: List[Dict]) -> str:
|
|
| 70 |
Formatted shopping list as a string
|
| 71 |
"""
|
| 72 |
categories = {
|
| 73 |
-
"
|
| 74 |
-
"Dairy & Eggs": ["milk", "
|
| 75 |
-
"
|
| 76 |
-
"
|
| 77 |
"Other": []
|
| 78 |
}
|
| 79 |
|
|
@@ -121,39 +108,47 @@ def extract_recipe_info(webpage_content: str) -> Dict:
|
|
| 121 |
webpage_content: HTML content from a recipe webpage
|
| 122 |
|
| 123 |
Returns:
|
| 124 |
-
Dictionary containing recipe
|
| 125 |
"""
|
| 126 |
-
|
| 127 |
"title": "",
|
| 128 |
-
"
|
|
|
|
| 129 |
"prep_time": "",
|
| 130 |
"cook_time": "",
|
| 131 |
-
"
|
| 132 |
-
"ingredients": [],
|
| 133 |
-
"shopping_list": ""
|
| 134 |
}
|
| 135 |
|
| 136 |
-
# Extract title
|
| 137 |
title_pattern = r'<h1[^>]*>(.*?)</h1>'
|
| 138 |
title_match = re.search(title_pattern, webpage_content)
|
| 139 |
if title_match:
|
| 140 |
-
|
| 141 |
|
| 142 |
-
# Extract
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
"prep_time": r'(?:Prep Time|Preparation Time):\s*(\d+\s*(?:min|hour|hr)s?)',
|
| 145 |
"cook_time": r'(?:Cook Time|Cooking Time):\s*(\d+\s*(?:min|hour|hr)s?)',
|
| 146 |
-
"
|
| 147 |
-
"servings": r'(?:Serves|Servings|Yield):\s*(\d+(?:-\d+)?)',
|
| 148 |
}
|
| 149 |
|
| 150 |
-
for key, pattern in
|
| 151 |
match = re.search(pattern, webpage_content, re.IGNORECASE)
|
| 152 |
if match:
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
# Get ingredients and create shopping list
|
| 156 |
-
metadata["ingredients"] = parse_recipe_ingredients(webpage_content)
|
| 157 |
-
metadata["shopping_list"] = organize_shopping_list(metadata["ingredients"])
|
| 158 |
|
| 159 |
-
return
|
|
|
|
| 1 |
+
from typing import List, Dict, Optional
|
|
|
|
| 2 |
import re
|
| 3 |
from collections import defaultdict
|
| 4 |
from smolagents import tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
@tool
|
| 7 |
+
def parse_recipe_ingredients(recipe_text: str) -> List[Dict]:
|
| 8 |
"""
|
| 9 |
+
Parses and structures recipe ingredients from text.
|
| 10 |
|
| 11 |
Args:
|
| 12 |
+
recipe_text: Text containing recipe ingredients
|
| 13 |
|
| 14 |
Returns:
|
| 15 |
+
List of dictionaries containing structured ingredient information
|
| 16 |
"""
|
| 17 |
+
ingredients = []
|
| 18 |
+
lines = [line.strip() for line in recipe_text.split('\n') if line.strip()]
|
| 19 |
+
|
| 20 |
+
for line in lines:
|
| 21 |
+
# Basic regex pattern for ingredient parsing
|
| 22 |
+
pattern = r'^([\d./]+)?\s*([a-zA-Z]+)?\s*(.*?)(?:\((.*?)\))?$'
|
| 23 |
+
match = re.match(pattern, line.strip())
|
| 24 |
+
|
| 25 |
+
if match:
|
| 26 |
+
amount_str, unit, name, notes = match.groups()
|
| 27 |
+
|
| 28 |
+
# Convert amount to float
|
| 29 |
+
try:
|
| 30 |
+
amount = float(amount_str) if amount_str else 0.0
|
| 31 |
+
except ValueError:
|
| 32 |
+
# Handle fractions like "1/2"
|
| 33 |
+
if amount_str and '/' in amount_str:
|
| 34 |
+
num, denom = amount_str.split('/')
|
| 35 |
+
amount = float(num) / float(denom)
|
| 36 |
+
else:
|
| 37 |
+
amount = 0.0
|
| 38 |
+
|
| 39 |
+
ingredients.append({
|
| 40 |
+
"name": name.strip() if name else "",
|
| 41 |
+
"amount": amount,
|
| 42 |
+
"unit": unit.strip() if unit else "",
|
| 43 |
+
"notes": notes.strip() if notes else ""
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
return ingredients
|
| 47 |
|
| 48 |
@tool
|
| 49 |
def organize_shopping_list(ingredients: List[Dict]) -> str:
|
| 50 |
"""
|
| 51 |
+
Creates an organized shopping list from ingredients.
|
| 52 |
|
| 53 |
Args:
|
| 54 |
ingredients: List of ingredient dictionaries
|
|
|
|
| 57 |
Formatted shopping list as a string
|
| 58 |
"""
|
| 59 |
categories = {
|
| 60 |
+
"Baking & Dry Goods": ["flour", "sugar", "baking", "powder", "soda", "cocoa", "chocolate", "nuts", "oats"],
|
| 61 |
+
"Dairy & Eggs": ["milk", "cream", "cheese", "butter", "egg", "yogurt"],
|
| 62 |
+
"Produce": ["fruit", "vegetable", "tomato", "onion", "garlic", "herb", "lettuce", "carrot"],
|
| 63 |
+
"Spices & Seasonings": ["salt", "pepper", "spice", "cinnamon", "vanilla", "seasoning"],
|
| 64 |
"Other": []
|
| 65 |
}
|
| 66 |
|
|
|
|
| 108 |
webpage_content: HTML content from a recipe webpage
|
| 109 |
|
| 110 |
Returns:
|
| 111 |
+
Dictionary containing recipe information
|
| 112 |
"""
|
| 113 |
+
recipe_info = {
|
| 114 |
"title": "",
|
| 115 |
+
"ingredients": [],
|
| 116 |
+
"instructions": [],
|
| 117 |
"prep_time": "",
|
| 118 |
"cook_time": "",
|
| 119 |
+
"servings": ""
|
|
|
|
|
|
|
| 120 |
}
|
| 121 |
|
| 122 |
+
# Extract title (looking for common patterns)
|
| 123 |
title_pattern = r'<h1[^>]*>(.*?)</h1>'
|
| 124 |
title_match = re.search(title_pattern, webpage_content)
|
| 125 |
if title_match:
|
| 126 |
+
recipe_info["title"] = re.sub(r'<[^>]+>', '', title_match.group(1)).strip()
|
| 127 |
|
| 128 |
+
# Extract ingredients section
|
| 129 |
+
ingredients_pattern = r'(?:Ingredients:|INGREDIENTS:)(.*?)(?:Instructions:|INSTRUCTIONS:|Directions:|DIRECTIONS:|Method:|$)'
|
| 130 |
+
ingredients_match = re.search(ingredients_pattern, webpage_content, re.DOTALL | re.IGNORECASE)
|
| 131 |
+
if ingredients_match:
|
| 132 |
+
ingredient_text = ingredients_match.group(1)
|
| 133 |
+
recipe_info["ingredients"] = parse_recipe_ingredients(ingredient_text)
|
| 134 |
+
|
| 135 |
+
# Extract instructions
|
| 136 |
+
instructions_pattern = r'(?:Instructions:|INSTRUCTIONS:|Directions:|DIRECTIONS:|Method:)(.*?)(?:Notes:|NOTES:|$)'
|
| 137 |
+
instructions_match = re.search(instructions_pattern, webpage_content, re.DOTALL | re.IGNORECASE)
|
| 138 |
+
if instructions_match:
|
| 139 |
+
instructions_text = instructions_match.group(1)
|
| 140 |
+
recipe_info["instructions"] = [step.strip() for step in instructions_text.split('\n') if step.strip()]
|
| 141 |
+
|
| 142 |
+
# Extract times and servings
|
| 143 |
+
time_patterns = {
|
| 144 |
"prep_time": r'(?:Prep Time|Preparation Time):\s*(\d+\s*(?:min|hour|hr)s?)',
|
| 145 |
"cook_time": r'(?:Cook Time|Cooking Time):\s*(\d+\s*(?:min|hour|hr)s?)',
|
| 146 |
+
"servings": r'(?:Serves|Servings|Yield):\s*(\d+(?:-\d+)?)'
|
|
|
|
| 147 |
}
|
| 148 |
|
| 149 |
+
for key, pattern in time_patterns.items():
|
| 150 |
match = re.search(pattern, webpage_content, re.IGNORECASE)
|
| 151 |
if match:
|
| 152 |
+
recipe_info[key] = match.group(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
return recipe_info
|