palltaruo commited on
Commit
2e07bcb
·
verified ·
1 Parent(s): 15cefb4

Update tools/recipe_tools.py

Browse files
Files changed (1) hide show
  1. tools/recipe_tools.py +67 -72
tools/recipe_tools.py CHANGED
@@ -1,67 +1,54 @@
1
- from dataclasses import dataclass
2
- from typing import List, Dict
3
  import re
4
  from collections import defaultdict
5
  from smolagents import tool
6
- from bs4 import BeautifulSoup
7
- import requests
8
-
9
- @dataclass
10
- class Recipe:
11
- title: str
12
- ingredients: List[Dict]
13
- instructions: List[str]
14
- prep_time: Optional[str] = None
15
- cook_time: Optional[str] = None
16
- servings: Optional[int] = None
17
- source_url: Optional[str] = None
18
-
19
 
20
  @tool
21
- def safe_webpage_visit(url: str) -> Dict:
22
  """
23
- Safely attempts to visit a webpage and extract recipe content with fallback options.
24
 
25
  Args:
26
- url: The URL of the recipe webpage
27
 
28
  Returns:
29
- Dictionary containing recipe information or error status
30
  """
31
- try:
32
- # First try using the provided VisitWebpageTool
33
- from tools.visit_webpage import VisitWebpageTool
34
- content = VisitWebpageTool().visit(url)
35
- return {
36
- "success": True,
37
- "content": content,
38
- "method": "visit_webpage_tool"
39
- }
40
- except Exception as e:
41
- try:
42
- # Fallback to requests if VisitWebpageTool fails
43
- headers = {
44
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
45
- }
46
- response = requests.get(url, headers=headers, timeout=10)
47
- response.raise_for_status()
48
- return {
49
- "success": True,
50
- "content": response.text,
51
- "method": "requests"
52
- }
53
- except Exception as e2:
54
- return {
55
- "success": False,
56
- "error": str(e2),
57
- "method": None
58
- }
59
-
 
60
 
61
  @tool
62
  def organize_shopping_list(ingredients: List[Dict]) -> str:
63
  """
64
- Organizes ingredients into a categorized shopping list.
65
 
66
  Args:
67
  ingredients: List of ingredient dictionaries
@@ -70,10 +57,10 @@ def organize_shopping_list(ingredients: List[Dict]) -> str:
70
  Formatted shopping list as a string
71
  """
72
  categories = {
73
- "Produce": ["vegetable", "fruit", "herb", "leafy", "tomato", "potato", "onion", "garlic"],
74
- "Dairy & Eggs": ["milk", "cheese", "cream", "butter", "yogurt", "egg"],
75
- "Meat & Seafood": ["chicken", "beef", "pork", "fish", "seafood", "turkey"],
76
- "Pantry Items": ["flour", "sugar", "oil", "vinegar", "sauce", "spice", "seasoning", "salt", "pepper"],
77
  "Other": []
78
  }
79
 
@@ -121,39 +108,47 @@ def extract_recipe_info(webpage_content: str) -> Dict:
121
  webpage_content: HTML content from a recipe webpage
122
 
123
  Returns:
124
- Dictionary containing recipe metadata and formatted ingredient list
125
  """
126
- metadata = {
127
  "title": "",
128
- "servings": "",
 
129
  "prep_time": "",
130
  "cook_time": "",
131
- "total_time": "",
132
- "ingredients": [],
133
- "shopping_list": ""
134
  }
135
 
136
- # Extract title
137
  title_pattern = r'<h1[^>]*>(.*?)</h1>'
138
  title_match = re.search(title_pattern, webpage_content)
139
  if title_match:
140
- metadata["title"] = re.sub(r'<[^>]+>', '', title_match.group(1)).strip()
141
 
142
- # Extract times and servings using common patterns
143
- patterns = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  "prep_time": r'(?:Prep Time|Preparation Time):\s*(\d+\s*(?:min|hour|hr)s?)',
145
  "cook_time": r'(?:Cook Time|Cooking Time):\s*(\d+\s*(?:min|hour|hr)s?)',
146
- "total_time": r'(?:Total Time):\s*(\d+\s*(?:min|hour|hr)s?)',
147
- "servings": r'(?:Serves|Servings|Yield):\s*(\d+(?:-\d+)?)',
148
  }
149
 
150
- for key, pattern in patterns.items():
151
  match = re.search(pattern, webpage_content, re.IGNORECASE)
152
  if match:
153
- metadata[key] = match.group(1)
154
-
155
- # Get ingredients and create shopping list
156
- metadata["ingredients"] = parse_recipe_ingredients(webpage_content)
157
- metadata["shopping_list"] = organize_shopping_list(metadata["ingredients"])
158
 
159
- return metadata
 
1
+ from typing import List, Dict, Optional
 
2
  import re
3
  from collections import defaultdict
4
  from smolagents import tool
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  @tool
7
+ def parse_recipe_ingredients(recipe_text: str) -> List[Dict]:
8
  """
9
+ Parses and structures recipe ingredients from text.
10
 
11
  Args:
12
+ recipe_text: Text containing recipe ingredients
13
 
14
  Returns:
15
+ List of dictionaries containing structured ingredient information
16
  """
17
+ ingredients = []
18
+ lines = [line.strip() for line in recipe_text.split('\n') if line.strip()]
19
+
20
+ for line in lines:
21
+ # Basic regex pattern for ingredient parsing
22
+ pattern = r'^([\d./]+)?\s*([a-zA-Z]+)?\s*(.*?)(?:\((.*?)\))?$'
23
+ match = re.match(pattern, line.strip())
24
+
25
+ if match:
26
+ amount_str, unit, name, notes = match.groups()
27
+
28
+ # Convert amount to float
29
+ try:
30
+ amount = float(amount_str) if amount_str else 0.0
31
+ except ValueError:
32
+ # Handle fractions like "1/2"
33
+ if amount_str and '/' in amount_str:
34
+ num, denom = amount_str.split('/')
35
+ amount = float(num) / float(denom)
36
+ else:
37
+ amount = 0.0
38
+
39
+ ingredients.append({
40
+ "name": name.strip() if name else "",
41
+ "amount": amount,
42
+ "unit": unit.strip() if unit else "",
43
+ "notes": notes.strip() if notes else ""
44
+ })
45
+
46
+ return ingredients
47
 
48
  @tool
49
  def organize_shopping_list(ingredients: List[Dict]) -> str:
50
  """
51
+ Creates an organized shopping list from ingredients.
52
 
53
  Args:
54
  ingredients: List of ingredient dictionaries
 
57
  Formatted shopping list as a string
58
  """
59
  categories = {
60
+ "Baking & Dry Goods": ["flour", "sugar", "baking", "powder", "soda", "cocoa", "chocolate", "nuts", "oats"],
61
+ "Dairy & Eggs": ["milk", "cream", "cheese", "butter", "egg", "yogurt"],
62
+ "Produce": ["fruit", "vegetable", "tomato", "onion", "garlic", "herb", "lettuce", "carrot"],
63
+ "Spices & Seasonings": ["salt", "pepper", "spice", "cinnamon", "vanilla", "seasoning"],
64
  "Other": []
65
  }
66
 
 
108
  webpage_content: HTML content from a recipe webpage
109
 
110
  Returns:
111
+ Dictionary containing recipe information
112
  """
113
+ recipe_info = {
114
  "title": "",
115
+ "ingredients": [],
116
+ "instructions": [],
117
  "prep_time": "",
118
  "cook_time": "",
119
+ "servings": ""
 
 
120
  }
121
 
122
+ # Extract title (looking for common patterns)
123
  title_pattern = r'<h1[^>]*>(.*?)</h1>'
124
  title_match = re.search(title_pattern, webpage_content)
125
  if title_match:
126
+ recipe_info["title"] = re.sub(r'<[^>]+>', '', title_match.group(1)).strip()
127
 
128
+ # Extract ingredients section
129
+ ingredients_pattern = r'(?:Ingredients:|INGREDIENTS:)(.*?)(?:Instructions:|INSTRUCTIONS:|Directions:|DIRECTIONS:|Method:|$)'
130
+ ingredients_match = re.search(ingredients_pattern, webpage_content, re.DOTALL | re.IGNORECASE)
131
+ if ingredients_match:
132
+ ingredient_text = ingredients_match.group(1)
133
+ recipe_info["ingredients"] = parse_recipe_ingredients(ingredient_text)
134
+
135
+ # Extract instructions
136
+ instructions_pattern = r'(?:Instructions:|INSTRUCTIONS:|Directions:|DIRECTIONS:|Method:)(.*?)(?:Notes:|NOTES:|$)'
137
+ instructions_match = re.search(instructions_pattern, webpage_content, re.DOTALL | re.IGNORECASE)
138
+ if instructions_match:
139
+ instructions_text = instructions_match.group(1)
140
+ recipe_info["instructions"] = [step.strip() for step in instructions_text.split('\n') if step.strip()]
141
+
142
+ # Extract times and servings
143
+ time_patterns = {
144
  "prep_time": r'(?:Prep Time|Preparation Time):\s*(\d+\s*(?:min|hour|hr)s?)',
145
  "cook_time": r'(?:Cook Time|Cooking Time):\s*(\d+\s*(?:min|hour|hr)s?)',
146
+ "servings": r'(?:Serves|Servings|Yield):\s*(\d+(?:-\d+)?)'
 
147
  }
148
 
149
+ for key, pattern in time_patterns.items():
150
  match = re.search(pattern, webpage_content, re.IGNORECASE)
151
  if match:
152
+ recipe_info[key] = match.group(1)
 
 
 
 
153
 
154
+ return recipe_info