Prathamesh Sable commited on
Commit
21c0d12
·
1 Parent(s): 4b571a2

modularization of ing agent

Browse files
migrations/versions/00248bed0fb5_updated_product.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """updated product
2
+
3
+ Revision ID: 00248bed0fb5
4
+ Revises: a193e9cfa8c5
5
+ Create Date: 2025-04-27 13:26:01.243225
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '00248bed0fb5'
16
+ down_revision: Union[str, None] = 'a193e9cfa8c5'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Upgrade schema."""
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column('products', sa.Column('overall_safety_score', sa.Integer(), nullable=True))
25
+ op.add_column('products', sa.Column('suitable_diet_types', sa.String(), nullable=True))
26
+ op.add_column('products', sa.Column('allergy_warnings', sa.JSON(), nullable=True))
27
+ op.add_column('products', sa.Column('usage_recommendations', sa.String(), nullable=True))
28
+ op.add_column('products', sa.Column('health_insights', sa.JSON(), nullable=True))
29
+ op.add_column('products', sa.Column('ingredient_interactions', sa.JSON(), nullable=True))
30
+ op.add_column('products', sa.Column('key_takeaway', sa.String(), nullable=True))
31
+ op.add_column('products', sa.Column('ingredients_count', sa.Integer(), nullable=True))
32
+ op.add_column('products', sa.Column('user_id', sa.Integer(), nullable=True))
33
+ op.add_column('products', sa.Column('timestamp', sa.DateTime(), nullable=True))
34
+ op.add_column('products', sa.Column('ingredient_ids', sa.JSON(), nullable=True))
35
+ op.drop_column('products', 'brands')
36
+ op.drop_column('products', 'ingredients_text')
37
+ op.drop_column('products', 'nutrient_levels')
38
+ op.drop_column('products', 'nutriments')
39
+ op.drop_column('products', 'nutriscore')
40
+ op.drop_column('products', 'generic_name')
41
+ # ### end Alembic commands ###
42
+
43
+
44
+ def downgrade() -> None:
45
+ """Downgrade schema."""
46
+ # ### commands auto generated by Alembic - please adjust! ###
47
+ op.add_column('products', sa.Column('generic_name', sa.VARCHAR(), autoincrement=False, nullable=True))
48
+ op.add_column('products', sa.Column('nutriscore', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True))
49
+ op.add_column('products', sa.Column('nutriments', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True))
50
+ op.add_column('products', sa.Column('nutrient_levels', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True))
51
+ op.add_column('products', sa.Column('ingredients_text', sa.VARCHAR(), autoincrement=False, nullable=True))
52
+ op.add_column('products', sa.Column('brands', sa.VARCHAR(), autoincrement=False, nullable=True))
53
+ op.drop_column('products', 'ingredient_ids')
54
+ op.drop_column('products', 'timestamp')
55
+ op.drop_column('products', 'user_id')
56
+ op.drop_column('products', 'ingredients_count')
57
+ op.drop_column('products', 'key_takeaway')
58
+ op.drop_column('products', 'ingredient_interactions')
59
+ op.drop_column('products', 'health_insights')
60
+ op.drop_column('products', 'usage_recommendations')
61
+ op.drop_column('products', 'allergy_warnings')
62
+ op.drop_column('products', 'suitable_diet_types')
63
+ op.drop_column('products', 'overall_safety_score')
64
+ # ### end Alembic commands ###
services/ingredientFinderAgent.py CHANGED
@@ -3,262 +3,21 @@ from functools import partial
3
  import os
4
  import json
5
  import traceback
6
- import requests
7
- import pandas as pd
8
  from dotenv import load_dotenv
9
- import aiohttp
10
- import time
11
-
12
  from typing import Dict, Any
 
13
  from langchain_google_genai import ChatGoogleGenerativeAI
14
- from langchain_community.tools import DuckDuckGoSearchRun
15
- from langchain_community.tools import WikipediaQueryRun
16
- from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
17
- from langchain_core.tools import tool
18
 
19
  # modular
20
- from logger_manager import logger
21
  from interfaces.ingredientModels import IngredientAnalysisResult,IngredientState
 
 
22
 
23
  # Load environment variables from .env file
24
  load_dotenv()
25
 
26
- # Load Scraped Database
27
- SCRAPED_DB_PATH = "data/Food_Aditives_E_numbers.csv" # Ensure this file exists
28
- if os.path.exists(SCRAPED_DB_PATH):
29
- additives_df = pd.read_csv(SCRAPED_DB_PATH)
30
- logger.info(f"Loaded database with {len(additives_df)} entries")
31
- else:
32
- additives_df = None
33
- logger.warning("Scraped database not found!")
34
-
35
-
36
- # Define a rate limit (adjust as needed)
37
- PUBCHEM_TIMEOUT = float(os.getenv("PUBCHEM_TIMEOUT", "2.0")) # seconds
38
- PUBCHEM_MAX_RETRIES = int(os.getenv("PUBCHEM_MAX_RETRIES", "3")) # Max retries
39
-
40
- # Rate limiting configuration
41
- DUCKDUCKGO_RATE_LIMIT_DELAY = float(os.getenv("DUCKDUCKGO_RATE_LIMIT_DELAY", "2.0")) # Delay in seconds
42
- DUCKDUCKGO_MAX_RETRIES = int(os.getenv("DUCKDUCKGO_MAX_RETRIES", "3")) # Max retries
43
 
44
 
45
- # Define tool functions
46
- @tool("search_local_db")
47
- def search_local_db(ingredient: str) -> Dict[str, Any]:
48
- """Search local database for ingredient information. E number database scrapped"""
49
- logger.info(f"Searching local DB for: {ingredient}")
50
- if additives_df is not None:
51
- match = additives_df[additives_df['Name of Additive'].str.contains(ingredient, case=False, na=False, regex=False)]
52
- if not match.empty:
53
- return {"source": "Local DB", "found": True, "data": match.iloc[0].to_dict()}
54
- return {"source": "Local DB", "found": False, "data": None}
55
-
56
- @tool("search_open_food_facts")
57
- def search_open_food_facts(ingredient: str) -> Dict[str, Any]:
58
- """Search Open Food Facts database for ingredient information."""
59
- logger.info(f"Searching Open Food Facts for: {ingredient}")
60
-
61
- try:
62
- open_food_facts_api = "https://world.openfoodfacts.org/api/v0"
63
- # Search for the ingredient
64
- search_url = f"{open_food_facts_api}/ingredient/{ingredient.lower().replace(' ', '-')}.json"
65
- response = requests.get(search_url, timeout=10)
66
-
67
- if response.status_code == 200:
68
- data = response.json()
69
- if data.get("status") == 1: # Successfully found
70
- return {
71
- "source": "Open Food Facts",
72
- "found": True,
73
- "data": data
74
- }
75
-
76
- # Try searching products containing this ingredient
77
- product_search_url = f"{open_food_facts_api}/search.json?ingredients_tags={ingredient.lower().replace(' ', '_')}&page_size=5"
78
- response = requests.get(product_search_url, timeout=10)
79
-
80
- if response.status_code == 200:
81
- data = response.json()
82
- if data.get("count") > 0:
83
- return {
84
- "source": "Open Food Facts Products",
85
- "found": True,
86
- "data": data
87
- }
88
-
89
- return {"source": "Open Food Facts", "found": False, "data": None}
90
-
91
- except Exception as e:
92
- logger.error(f"Error searching Open Food Facts: {e}")
93
- return {"source": "Open Food Facts", "found": False, "error": str(e)}
94
-
95
- @tool("search_usda")
96
- def search_usda(ingredient: str) -> Dict[str, Any]:
97
- """Search USDA FoodData Central for ingredient information."""
98
- logger.info(f"Searching USDA for: {ingredient}")
99
-
100
- try:
101
- usda_api = "https://api.nal.usda.gov/fdc/v1"
102
- usda_api_key = os.getenv("USDA_API_KEY", "DEMO_KEY") # Use DEMO_KEY if not provided
103
-
104
- # Search for the ingredient
105
- search_url = f"{usda_api}/foods/search"
106
- params = {
107
- "api_key": usda_api_key,
108
- "query": ingredient,
109
- "dataType": ["Foundation", "SR Legacy", "Branded"],
110
- "pageSize": 5
111
- }
112
-
113
- response = requests.get(search_url, params=params, timeout=10)
114
-
115
- if response.status_code == 200:
116
- data = response.json()
117
- if data.get("totalHits", 0) > 0:
118
- return {
119
- "source": "USDA FoodData Central",
120
- "found": True,
121
- "data": data
122
- }
123
-
124
- return {"source": "USDA FoodData Central", "found": False, "data": None}
125
-
126
- except Exception as e:
127
- logger.error(f"Error searching USDA: {e}")
128
- return {"source": "USDA FoodData Central", "found": False, "error": str(e)}
129
-
130
- async def async_search_pubchem(ingredient: str) -> Dict[str, Any]:
131
- """Asynchronously search PubChem for chemical information about the ingredient."""
132
- logger.info(f"Searching PubChem for: {ingredient}")
133
-
134
- try:
135
- pubchem_api = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data"
136
- # https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Input
137
-
138
- async with aiohttp.ClientSession() as session:
139
- # First try to get compound information by name
140
- search_url = f"{pubchem_api}/compound/name/{ingredient}/JSON"
141
-
142
- async def fetch_data(url: str, timeout: int = PUBCHEM_TIMEOUT, retry_count: int = 0):
143
- try:
144
- async with session.get(url, timeout=timeout) as response:
145
- if response.status == 200:
146
- return await response.json()
147
- else:
148
- logger.warning(f"PubChem returned status: {response.status} for URL: {url}")
149
- return None
150
- except asyncio.TimeoutError:
151
- if retry_count < PUBCHEM_MAX_RETRIES:
152
- delay = (2 ** retry_count) * 5 # Exponential backoff
153
- logger.warning(f"PubChem timeout for URL '{url}'. Retrying in {delay:.2f} seconds (attempt {retry_count + 1}/{PUBCHEM_MAX_RETRIES})")
154
- await asyncio.sleep(delay)
155
- return await fetch_data(url, timeout, retry_count + 1) # Recursive retry
156
- else:
157
- logger.error(f"Max retries reached for PubChem timeout on URL: {url}")
158
- return None
159
- except Exception as e:
160
- logger.error(f"PubChem error for URL '{url}': {e}")
161
- return None
162
-
163
- data = await fetch_data(search_url)
164
-
165
- if data and "PC_Compounds" in data:
166
- compound_id = data["PC_Compounds"][0]["id"]["id"]["cid"]
167
-
168
- # Get more detailed information using the CID
169
- property_url = f"{pubchem_api}/compound/cid/{compound_id}/property/MolecularFormula,MolecularWeight,IUPACName,InChI,InChIKey,CanonicalSMILES/JSON"
170
- properties_data = await fetch_data(property_url)
171
-
172
- # Get classifications and categories
173
- classification_url = f"{pubchem_api}/compound/cid/{compound_id}/classification/JSON"
174
- classification_data = await fetch_data(classification_url)
175
-
176
- return {
177
- "source": "PubChem",
178
- "found": True,
179
- "data": {
180
- "compound_info": data,
181
- "properties": properties_data,
182
- "classification": classification_data
183
- }
184
- }
185
-
186
- return {"source": "PubChem", "found": False, "data": None}
187
-
188
- except Exception as e:
189
- logger.error(f"Error searching PubChem: {e}")
190
- return {"source": "PubChem", "found": False, "error": str(e)}
191
-
192
- @tool("search_pubchem")
193
- def search_pubchem(ingredient: str) -> Dict[str, Any]:
194
- """Search PubChem for chemical information about the ingredient."""
195
- # Use asyncio.run to handle the async operation from synchronous code
196
- try:
197
- # For Python 3.7+
198
- return asyncio.run(async_search_pubchem(ingredient))
199
- except RuntimeError:
200
- # If already in an event loop (e.g., in FastAPI)
201
- loop = asyncio.get_event_loop()
202
- return loop.run_until_complete(async_search_pubchem(ingredient))
203
-
204
- @tool("search_wikipedia")
205
- def search_wikipedia(ingredient: str) -> Dict[str, Any]:
206
- """Search Wikipedia for ingredient information."""
207
- logger.info(f"Searching Wikipedia for: {ingredient}")
208
-
209
- try:
210
- wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
211
- wiki_result = wikipedia.run(ingredient)
212
-
213
- if wiki_result and len(wiki_result) > 100: # Only count substantial results
214
- return {
215
- "source": "Wikipedia",
216
- "found": True,
217
- "data": wiki_result
218
- }
219
- else:
220
- # Try with more specific searches
221
- food_wiki = wikipedia.run(f"{ingredient} food additive")
222
- if food_wiki and len(food_wiki) > 100:
223
- return {
224
- "source": "Wikipedia",
225
- "found": True,
226
- "data": food_wiki
227
- }
228
-
229
- chemical_wiki = wikipedia.run(f"{ingredient} chemical compound")
230
- if chemical_wiki and len(chemical_wiki) > 100:
231
- return {
232
- "source": "Wikipedia",
233
- "found": True,
234
- "data": chemical_wiki
235
- }
236
-
237
- return {"source": "Wikipedia", "found": False, "data": None}
238
-
239
- except Exception as e:
240
- logger.error(f"Error searching Wikipedia: {e}")
241
- return {"source": "Wikipedia", "found": False, "error": str(e)}
242
-
243
- @tool("search_web")
244
- def search_web(ingredient: str) -> Dict[str, Any]:
245
- """Search web for ingredient information using DuckDuckGo."""
246
- logger.info(f"Searching web for: {ingredient}")
247
-
248
- try:
249
- duckduckgo = DuckDuckGoSearchRun()
250
- search_queries = [f"{ingredient} food ingredient safety", f"{ingredient} E-number food additive",f"{ingredient}'s allergic information",f"is {ingredient} vegan,vegetarian or Non-vegetarian"]
251
- all_results = []
252
- for query in search_queries:
253
- time.sleep(DUCKDUCKGO_RATE_LIMIT_DELAY)
254
- result = duckduckgo.run(query)
255
- if result:
256
- all_results.append({"query": query, "result": result})
257
- return {"source": "DuckDuckGo", "found": bool(all_results), "data": all_results}
258
- except Exception as e:
259
- logger.error(f"Web search error: {e}")
260
- return {"source": "DuckDuckGo", "found": False, "error": str(e)}
261
-
262
  def create_summary_from_source(source: Dict[str, Any]) -> str:
263
  """Create a meaningful summary from source data."""
264
  source_name = source.get("source", "Unknown")
 
3
  import os
4
  import json
5
  import traceback
 
 
6
  from dotenv import load_dotenv
 
 
 
7
  from typing import Dict, Any
8
+
9
  from langchain_google_genai import ChatGoogleGenerativeAI
 
 
 
 
10
 
11
  # modular
 
12
  from interfaces.ingredientModels import IngredientAnalysisResult,IngredientState
13
+ from logger_manager import logger
14
+ from utils.agent_tools import search_local_db,search_web,search_wikipedia,search_open_food_facts,search_usda,search_pubchem
15
 
16
  # Load environment variables from .env file
17
  load_dotenv()
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def create_summary_from_source(source: Dict[str, Any]) -> str:
22
  """Create a meaningful summary from source data."""
23
  source_name = source.get("source", "Unknown")
utils/agent_tools.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+
4
+ import pandas as pd
5
+ from dotenv import load_dotenv
6
+
7
+ from typing import Dict, Any
8
+ # modular
9
+ from logger_manager import logger
10
+ from dotenv import load_dotenv
11
+
12
+ import aiohttp
13
+ import time
14
+ import requests
15
+
16
+ from langchain_community.tools import DuckDuckGoSearchRun
17
+ from langchain_community.tools import WikipediaQueryRun
18
+ from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
19
+ from langchain_core.tools import tool
20
+
21
+
22
+ from logger_manager import logger
23
+
24
+ # Load environment variables from .env file
25
+ load_dotenv()
26
+
27
+ # Load Scraped Database
28
+ SCRAPED_DB_PATH = "data/Food_Aditives_E_numbers.csv" # Ensure this file exists
29
+ if os.path.exists(SCRAPED_DB_PATH):
30
+ additives_df = pd.read_csv(SCRAPED_DB_PATH)
31
+ logger.info(f"Loaded database with {len(additives_df)} entries")
32
+ else:
33
+ additives_df = None
34
+ logger.warning("Scraped database not found!")
35
+
36
+
37
+ # Define a rate limit (adjust as needed)
38
+ PUBCHEM_TIMEOUT = float(os.getenv("PUBCHEM_TIMEOUT", "2.0")) # seconds
39
+ PUBCHEM_MAX_RETRIES = int(os.getenv("PUBCHEM_MAX_RETRIES", "3")) # Max retries
40
+
41
+ # Rate limiting configuration
42
+ DUCKDUCKGO_RATE_LIMIT_DELAY = float(os.getenv("DUCKDUCKGO_RATE_LIMIT_DELAY", "2.0")) # Delay in seconds
43
+ DUCKDUCKGO_MAX_RETRIES = int(os.getenv("DUCKDUCKGO_MAX_RETRIES", "3")) # Max retries
44
+
45
+
46
+ # Define tool functions
47
+ @tool("search_local_db")
48
+ def search_local_db(ingredient: str) -> Dict[str, Any]:
49
+ """Search local database for ingredient information. E number database scrapped"""
50
+ logger.info(f"Searching local DB for: {ingredient}")
51
+ if additives_df is not None:
52
+ match = additives_df[additives_df['Name of Additive'].str.contains(ingredient, case=False, na=False, regex=False)]
53
+ if not match.empty:
54
+ return {"source": "Local DB", "found": True, "data": match.iloc[0].to_dict()}
55
+ return {"source": "Local DB", "found": False, "data": None}
56
+
57
+ @tool("search_open_food_facts")
58
+ def search_open_food_facts(ingredient: str) -> Dict[str, Any]:
59
+ """Search Open Food Facts database for ingredient information."""
60
+ logger.info(f"Searching Open Food Facts for: {ingredient}")
61
+
62
+ try:
63
+ open_food_facts_api = "https://world.openfoodfacts.org/api/v0"
64
+ # Search for the ingredient
65
+ search_url = f"{open_food_facts_api}/ingredient/{ingredient.lower().replace(' ', '-')}.json"
66
+ response = requests.get(search_url, timeout=10)
67
+
68
+ if response.status_code == 200:
69
+ data = response.json()
70
+ if data.get("status") == 1: # Successfully found
71
+ return {
72
+ "source": "Open Food Facts",
73
+ "found": True,
74
+ "data": data
75
+ }
76
+
77
+ # Try searching products containing this ingredient
78
+ product_search_url = f"{open_food_facts_api}/search.json?ingredients_tags={ingredient.lower().replace(' ', '_')}&page_size=5"
79
+ response = requests.get(product_search_url, timeout=10)
80
+
81
+ if response.status_code == 200:
82
+ data = response.json()
83
+ if data.get("count") > 0:
84
+ return {
85
+ "source": "Open Food Facts Products",
86
+ "found": True,
87
+ "data": data
88
+ }
89
+
90
+ return {"source": "Open Food Facts", "found": False, "data": None}
91
+
92
+ except Exception as e:
93
+ logger.error(f"Error searching Open Food Facts: {e}")
94
+ return {"source": "Open Food Facts", "found": False, "error": str(e)}
95
+
96
+ @tool("search_usda")
97
+ def search_usda(ingredient: str) -> Dict[str, Any]:
98
+ """Search USDA FoodData Central for ingredient information."""
99
+ logger.info(f"Searching USDA for: {ingredient}")
100
+
101
+ try:
102
+ usda_api = "https://api.nal.usda.gov/fdc/v1"
103
+ usda_api_key = os.getenv("USDA_API_KEY", "DEMO_KEY") # Use DEMO_KEY if not provided
104
+
105
+ # Search for the ingredient
106
+ search_url = f"{usda_api}/foods/search"
107
+ params = {
108
+ "api_key": usda_api_key,
109
+ "query": ingredient,
110
+ "dataType": ["Foundation", "SR Legacy", "Branded"],
111
+ "pageSize": 5
112
+ }
113
+
114
+ response = requests.get(search_url, params=params, timeout=10)
115
+
116
+ if response.status_code == 200:
117
+ data = response.json()
118
+ if data.get("totalHits", 0) > 0:
119
+ return {
120
+ "source": "USDA FoodData Central",
121
+ "found": True,
122
+ "data": data
123
+ }
124
+
125
+ return {"source": "USDA FoodData Central", "found": False, "data": None}
126
+
127
+ except Exception as e:
128
+ logger.error(f"Error searching USDA: {e}")
129
+ return {"source": "USDA FoodData Central", "found": False, "error": str(e)}
130
+
131
+ async def async_search_pubchem(ingredient: str) -> Dict[str, Any]:
132
+ """Asynchronously search PubChem for chemical information about the ingredient."""
133
+ logger.info(f"Searching PubChem for: {ingredient}")
134
+
135
+ try:
136
+ pubchem_api = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data"
137
+ # https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest#section=Input
138
+
139
+ async with aiohttp.ClientSession() as session:
140
+ # First try to get compound information by name
141
+ search_url = f"{pubchem_api}/compound/name/{ingredient}/JSON"
142
+
143
+ async def fetch_data(url: str, timeout: int = PUBCHEM_TIMEOUT, retry_count: int = 0):
144
+ try:
145
+ async with session.get(url, timeout=timeout) as response:
146
+ if response.status == 200:
147
+ return await response.json()
148
+ else:
149
+ logger.warning(f"PubChem returned status: {response.status} for URL: {url}")
150
+ return None
151
+ except asyncio.TimeoutError:
152
+ if retry_count < PUBCHEM_MAX_RETRIES:
153
+ delay = (2 ** retry_count) * 5 # Exponential backoff
154
+ logger.warning(f"PubChem timeout for URL '{url}'. Retrying in {delay:.2f} seconds (attempt {retry_count + 1}/{PUBCHEM_MAX_RETRIES})")
155
+ await asyncio.sleep(delay)
156
+ return await fetch_data(url, timeout, retry_count + 1) # Recursive retry
157
+ else:
158
+ logger.error(f"Max retries reached for PubChem timeout on URL: {url}")
159
+ return None
160
+ except Exception as e:
161
+ logger.error(f"PubChem error for URL '{url}': {e}")
162
+ return None
163
+
164
+ data = await fetch_data(search_url)
165
+
166
+ if data and "PC_Compounds" in data:
167
+ compound_id = data["PC_Compounds"][0]["id"]["id"]["cid"]
168
+
169
+ # Get more detailed information using the CID
170
+ property_url = f"{pubchem_api}/compound/cid/{compound_id}/property/MolecularFormula,MolecularWeight,IUPACName,InChI,InChIKey,CanonicalSMILES/JSON"
171
+ properties_data = await fetch_data(property_url)
172
+
173
+ # Get classifications and categories
174
+ classification_url = f"{pubchem_api}/compound/cid/{compound_id}/classification/JSON"
175
+ classification_data = await fetch_data(classification_url)
176
+
177
+ return {
178
+ "source": "PubChem",
179
+ "found": True,
180
+ "data": {
181
+ "compound_info": data,
182
+ "properties": properties_data,
183
+ "classification": classification_data
184
+ }
185
+ }
186
+
187
+ return {"source": "PubChem", "found": False, "data": None}
188
+
189
+ except Exception as e:
190
+ logger.error(f"Error searching PubChem: {e}")
191
+ return {"source": "PubChem", "found": False, "error": str(e)}
192
+
193
+ @tool("search_pubchem")
194
+ def search_pubchem(ingredient: str) -> Dict[str, Any]:
195
+ """Search PubChem for chemical information about the ingredient."""
196
+ # Use asyncio.run to handle the async operation from synchronous code
197
+ try:
198
+ # For Python 3.7+
199
+ return asyncio.run(async_search_pubchem(ingredient))
200
+ except RuntimeError:
201
+ # If already in an event loop (e.g., in FastAPI)
202
+ loop = asyncio.get_event_loop()
203
+ return loop.run_until_complete(async_search_pubchem(ingredient))
204
+
205
+ @tool("search_wikipedia")
206
+ def search_wikipedia(ingredient: str) -> Dict[str, Any]:
207
+ """Search Wikipedia for ingredient information."""
208
+ logger.info(f"Searching Wikipedia for: {ingredient}")
209
+
210
+ try:
211
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
212
+ wiki_result = wikipedia.run(ingredient)
213
+
214
+ if wiki_result and len(wiki_result) > 100: # Only count substantial results
215
+ return {
216
+ "source": "Wikipedia",
217
+ "found": True,
218
+ "data": wiki_result
219
+ }
220
+ else:
221
+ # Try with more specific searches
222
+ food_wiki = wikipedia.run(f"{ingredient} food additive")
223
+ if food_wiki and len(food_wiki) > 100:
224
+ return {
225
+ "source": "Wikipedia",
226
+ "found": True,
227
+ "data": food_wiki
228
+ }
229
+
230
+ chemical_wiki = wikipedia.run(f"{ingredient} chemical compound")
231
+ if chemical_wiki and len(chemical_wiki) > 100:
232
+ return {
233
+ "source": "Wikipedia",
234
+ "found": True,
235
+ "data": chemical_wiki
236
+ }
237
+
238
+ return {"source": "Wikipedia", "found": False, "data": None}
239
+
240
+ except Exception as e:
241
+ logger.error(f"Error searching Wikipedia: {e}")
242
+ return {"source": "Wikipedia", "found": False, "error": str(e)}
243
+
244
+ @tool("search_web")
245
+ def search_web(ingredient: str) -> Dict[str, Any]:
246
+ """Search web for ingredient information using DuckDuckGo."""
247
+ logger.info(f"Searching web for: {ingredient}")
248
+
249
+ try:
250
+ duckduckgo = DuckDuckGoSearchRun()
251
+ search_queries = [f"{ingredient} food ingredient safety", f"{ingredient} E-number food additive",f"{ingredient}'s allergic information",f"is {ingredient} vegan,vegetarian or Non-vegetarian"]
252
+ all_results = []
253
+ for query in search_queries:
254
+ time.sleep(DUCKDUCKGO_RATE_LIMIT_DELAY)
255
+ result = duckduckgo.run(query)
256
+ if result:
257
+ all_results.append({"query": query, "result": result})
258
+ return {"source": "DuckDuckGo", "found": bool(all_results), "data": all_results}
259
+ except Exception as e:
260
+ logger.error(f"Web search error: {e}")
261
+ return {"source": "DuckDuckGo", "found": False, "error": str(e)}