Lasdw commited on
Commit
85c9819
·
1 Parent(s): 9203ad7

Remove the use of APIfy search

Browse files
Files changed (2) hide show
  1. agent.py +376 -245
  2. requirements.txt +2 -1
agent.py CHANGED
@@ -26,6 +26,7 @@ from apify_client import ApifyClient
26
  from langchain_community.document_loaders import WikipediaLoader
27
  from langchain_community.document_loaders import ArxivLoader
28
  from langchain_community.tools.tavily_search import TavilySearchResults # For Tavily search
 
29
 
30
  load_dotenv()
31
 
@@ -116,60 +117,60 @@ def run_python_code(code: str):
116
  return f"Error executing code: {str(e)}"
117
 
118
  # Apify-based search function
119
- def apify_google_search(query: str, limit: int = 10) -> str:
120
- """
121
- Use Apify's Google Search Results Scraper to get search results
122
-
123
- Args:
124
- query: The search query string
125
- limit: Number of results to return (10, 20, 30, 40, 50, 100)
126
-
127
- Returns:
128
- Formatted search results as a string
129
- """
130
- # You would need to provide a valid Apify API token
131
- # You can get one by signing up at https://apify.com/
132
- # Replace this with your actual Apify API token or set as environment variable
133
- APIFY_API_TOKEN = os.environ.get("APIFY_API_TOKEN", "")
134
-
135
- if not APIFY_API_TOKEN:
136
- print("No Apify API token found. Using fallback search method.")
137
- return fallback_search(query)
138
-
139
- try:
140
- # Initialize the ApifyClient with API token
141
- client = ApifyClient(APIFY_API_TOKEN)
142
-
143
- # Prepare the Actor input - convert limit to string as required by the API
144
- run_input = {
145
- "keyword": query,
146
- "limit": str(limit), # Convert to string as required by the API
147
- "country": "US"
148
- }
149
-
150
- # The Actor ID for the Google Search Results Scraper
151
- ACTOR_ID = "563JCPLOqM1kMmbbP"
152
-
153
- print(f"Starting Apify search for: '{query}'")
154
-
155
- # Run the Actor and wait for it to finish (with timeout)
156
- run = client.actor(ACTOR_ID).call(run_input=run_input, timeout_secs=60)
157
-
158
- if not run or not run.get("defaultDatasetId"):
159
- print("Failed to get results from Apify actor")
160
- return fallback_search(query)
161
-
162
- # Fetch Actor results from the run's dataset
163
- results = []
164
- for item in client.dataset(run["defaultDatasetId"]).iterate_items():
165
- results.append(item)
166
-
167
- # Format and return the results
168
- return format_search_results(results, query)
169
-
170
- except Exception as e:
171
- print(f"Error using Apify: {str(e)}")
172
- return fallback_search(query)
173
 
174
  def scrape_webpage(url: str) -> str:
175
  """
@@ -270,103 +271,105 @@ def scrape_webpage(url: str) -> str:
270
  except Exception as e:
271
  return f"Error scraping webpage {url}: {str(e)}"
272
 
273
- def format_search_results(results: List[Dict], query: str) -> str:
274
- """Format the search results into a readable string"""
275
- if not results or len(results) == 0:
276
- return f"No results found for query: {query}"
277
-
278
- print(f"Raw search results: {str(results)[:1000]}...")
279
-
280
- # Extract search results from the Apify output
281
- formatted_results = f"Search results for '{query}':\n\n"
282
-
283
- # Check if results is a list of dictionaries or a dictionary with nested results
284
- if isinstance(results, dict) and "results" in results:
285
- items = results["results"]
286
- elif isinstance(results, list):
287
- items = results
288
- else:
289
- return f"Unable to process results for query: {query}"
290
-
291
- # Handle different Apify result formats
292
- if len(items) > 0:
293
- # Check the structure of the first item to determine format
294
- first_item = items[0]
295
-
296
- # If item has 'organicResults', this is the format from some Apify actors
297
- if isinstance(first_item, dict) and "organicResults" in first_item:
298
- organic_results = first_item.get("organicResults", [])
299
- for i, result in enumerate(organic_results[:10], 1):
300
- if "title" in result and "url" in result:
301
- formatted_results += f"{i}. {result['title']}\n"
302
- formatted_results += f" URL: {result['url']}\n"
303
- if "snippet" in result:
304
- formatted_results += f" {result['snippet']}\n"
305
- formatted_results += "\n"
306
- else:
307
- # Standard format with title/url/description
308
- for i, result in enumerate(items[:10], 1):
309
- if "title" in result and "url" in result:
310
- formatted_results += f"{i}. {result['title']}\n"
311
- formatted_results += f" URL: {result['url']}\n"
312
- if "description" in result:
313
- formatted_results += f" {result['description']}\n"
314
- elif "snippet" in result:
315
- formatted_results += f" {result['snippet']}\n"
316
- formatted_results += "\n"
317
-
318
- return formatted_results
 
319
 
320
- def fallback_search(query: str) -> str:
321
- """Fallback search method using DuckDuckGo when Apify is not available"""
322
- try:
323
- search_tool = DuckDuckGoSearchRun()
324
- result = search_tool.invoke(query)
325
- return "Observation: " + result
326
- except Exception as e:
327
- return f"Search error: {str(e)}. Please try a different query or method."
 
328
 
329
- # Custom search function with improved error handling
330
- def safe_web_search(query: str) -> str:
331
- """Search the web safely with error handling and retry logic."""
332
- if not query:
333
- return "Error: No search query provided. Please specify what you want to search for."
334
-
335
- # Try using Apify first, if it fails it will use the fallback
336
- return "Observation: " + apify_google_search(query)
337
-
338
- # The code below is kept for reference but won't be executed
339
- max_retries = 3
340
- backoff_factor = 1.5
341
-
342
- for attempt in range(max_retries):
343
- try:
344
- # Use the DuckDuckGoSearchRun tool
345
- search_tool = DuckDuckGoSearchRun()
346
- result = search_tool.invoke(query)
347
-
348
- # If we get an empty result, provide a helpful message
349
- if not result or len(result.strip()) < 10:
350
- return f"The search for '{query}' did not return any useful results. Please try a more specific query or a different search engine."
351
-
352
- return "Observation: " + result
353
-
354
- except Exception as e:
355
- # If we're being rate limited
356
- if "Ratelimit" in str(e) or "429" in str(e):
357
- if attempt < max_retries - 1:
358
- wait_time = backoff_factor ** attempt
359
- print(f"Rate limited, waiting {wait_time:.2f} seconds before retrying...")
360
- time.sleep(wait_time)
361
- else:
362
- # On last attempt, return a helpful error
363
- error_msg = f"I'm currently unable to search for '{query}' due to service rate limits. "
364
- return error_msg
365
- else:
366
- # For other types of errors
367
- return f"Error while searching for '{query}': {str(e)}"
368
-
369
- return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
370
 
371
  def wikipedia_search(query: str, num_results: int = 3) -> str:
372
  """
@@ -535,25 +538,97 @@ def arxiv_search(query: str, max_results: int = 5) -> str:
535
  except Exception as e:
536
  return f"Error searching ArXiv: {str(e)}"
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  # System prompt to guide the model's behavior
 
 
 
 
539
  SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
540
 
541
  python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
542
  wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
543
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
544
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
545
- web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
546
- webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
547
-
548
- IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
549
- 1. First reason about the problem in the "Thought" section
550
- 2. Then decide what action to take in the "Action" section (using the tools)
551
- 3. Wait for an observation from the tool
552
- 4. Based on the observation, continue with another thought
553
- 5. This cycle repeats until you have enough information to provide a final answer
554
-
555
- NEVER fake or simulate tool output yourself. You can try to use the tools multiple times if needed and try using multiple tools if needed.
556
- Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
557
 
558
  The way you use the tools is by specifying a json blob.
559
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
@@ -563,44 +638,48 @@ python_code: Execute Python code, args: {"code": {"type": "string"}}
563
  wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
564
  tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
565
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
566
- web_search: Search the web for current information, args: {"query": {"type": "string"}}
567
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
 
568
 
569
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
570
 
571
- example use:
572
 
 
573
  ```json
574
  {
575
- "action": "web_search",
576
- "action_input": {"query": "population of New York City"}
577
  }
578
  ```
579
 
580
- Or for scraping a webpage:
581
-
582
  ```json
583
  {
584
- "action": "webpage_scrape",
585
- "action_input": {"url": "https://en.wikipedia.org/wiki/Artificial_intelligence"}
586
  }
587
  ```
588
 
589
- Or for searching Wikipedia:
 
 
 
 
 
 
590
 
 
591
  ```json
592
  {
593
- "action": "wikipedia_search",
594
- "action_input": {"query": "quantum physics", "num_results": 3}
595
  }
596
  ```
597
 
598
  ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
599
-
600
  Question: [the user's question]
601
-
602
  Thought: [your reasoning about what to do next]
603
-
604
  Action:
605
  ```json
606
  {
@@ -608,11 +687,8 @@ Action:
608
  "action_input": {"[parameter_name]": "[parameter_value]"}
609
  }
610
  ```
611
-
612
  Observation: [the result from the tool will appear here]
613
-
614
  Thought: [your reasoning after seeing the observation]
615
-
616
  Action:
617
  ```json
618
  {
@@ -620,9 +696,7 @@ Action:
620
  "action_input": {"[parameter_name]": "[parameter_value]"}
621
  }
622
  ```
623
-
624
  Observation: [another tool result will appear here]
625
-
626
  IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
627
  1. First reason about the problem in the "Thought" section
628
  2. Then decide what action to take in the "Action" section (using the tools)
@@ -633,12 +707,9 @@ IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observ
633
  NEVER fake or simulate tool output yourself.
634
 
635
  ... (this Thought/Action/Observation cycle can repeat as needed) ...
636
-
637
  Thought: I now know the final answer
638
-
639
  Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
640
  Make sure to follow any formatting instructions given by the user.
641
-
642
  Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
643
 
644
  # Generate the chat interface, including the tools
@@ -650,21 +721,21 @@ llm = ChatOpenAI(
650
  chat = llm
651
  # Tools are defined but not bound to the LLM here
652
  tools_config = [
653
- {
654
- "name": "web_search",
655
- "description": "Search the web for current information. Provide a specific search query in the format: {\"query\": \"your search query here\"}",
656
- "func": safe_web_search
657
- },
658
  {
659
  "name": "python_code",
660
  "description": "Execute Python code. Provide the complete Python code as a string in the format: {\"code\": \"your python code here\"}",
661
  "func": run_python_code
662
  },
663
- {
664
- "name": "webpage_scrape",
665
- "description": "Scrape content from a specific webpage URL. Provide a valid URL in the format: {\"url\": \"https://example.com\"}",
666
- "func": scrape_webpage
667
- },
668
  {
669
  "name": "wikipedia_search",
670
  "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
@@ -679,6 +750,11 @@ tools_config = [
679
  "name": "arxiv_search",
680
  "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
681
  "func": arxiv_search
 
 
 
 
 
682
  }
683
  ]
684
 
@@ -813,51 +889,52 @@ def extract_json_from_text(text: str) -> dict:
813
  print(f"Error extracting JSON: {e}")
814
  return None
815
 
816
- def web_search_node(state: AgentState) -> Dict[str, Any]:
817
- """Node that executes the web search tool."""
818
- print("Web Search Tool Called...\n\n")
819
-
820
- # Extract tool arguments
821
- action_input = state.get("action_input", {})
822
- print(f"Web search action_input: {action_input}")
823
-
824
- # Try different ways to extract the query
825
- query = ""
826
- if isinstance(action_input, dict):
827
- query = action_input.get("query", "")
828
- elif isinstance(action_input, str):
829
- query = action_input
830
-
831
- print(f"Searching for: '{query}'")
832
-
833
- # Call the search function with retry logic
834
- result = safe_web_search(query)
835
- print(f"Search result: {result}") # Print the full result for debugging
836
-
837
- # Check if we hit rate limits and add a helpful note
838
- if "rate limit" in result.lower() or "ratelimit" in result.lower():
839
- result += "\n\nNote: You can use your internal knowledge to provide a response since the search is rate limited."
840
-
841
- # Format the observation to continue the ReAct cycle
842
- # Don't include "Observation:" as the assistant is stopped at this token
843
- observation = result
844
-
845
- # Create a tool message with the result
846
- tool_message = AIMessage(
847
- content=f"Observation: {observation}"
848
- )
849
-
850
- # Print the observation that will be sent back to the assistant
851
- print("\n=== TOOL OBSERVATION ===")
852
- print(tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content)
853
- print("=== END OBSERVATION ===\n")
854
-
855
- # Return the updated state
856
- return {
857
- "messages": state["messages"] + [tool_message],
858
- "current_tool": None, # Reset the current tool
859
- "action_input": None # Clear the action input
860
- }
 
861
 
862
  def python_code_node(state: AgentState) -> Dict[str, Any]:
863
  """Node that executes Python code."""
@@ -1102,6 +1179,55 @@ def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
1102
  "action_input": None # Clear the action input
1103
  }
1104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1105
  # Router function to direct to the correct tool
1106
  def router(state: AgentState) -> str:
1107
  """Route to the appropriate tool based on the current_tool field."""
@@ -1110,9 +1236,9 @@ def router(state: AgentState) -> str:
1110
  print(f"Routing to: {tool}")
1111
  print(f"Router received action_input: {action_input}")
1112
 
1113
- if tool == "web_search":
1114
- return "web_search"
1115
- elif tool == "python_code":
1116
  return "python_code"
1117
  elif tool == "webpage_scrape":
1118
  return "webpage_scrape"
@@ -1122,6 +1248,8 @@ def router(state: AgentState) -> str:
1122
  return "tavily_search"
1123
  elif tool == "arxiv_search":
1124
  return "arxiv_search"
 
 
1125
  else:
1126
  return "end"
1127
 
@@ -1132,12 +1260,13 @@ def create_agent_graph() -> StateGraph:
1132
 
1133
  # Define nodes: these do the work
1134
  builder.add_node("assistant", assistant)
1135
- builder.add_node("web_search", web_search_node)
1136
  builder.add_node("python_code", python_code_node)
1137
  builder.add_node("webpage_scrape", webpage_scrape_node)
1138
  builder.add_node("wikipedia_search", wikipedia_search_node)
1139
  builder.add_node("tavily_search", tavily_search_node)
1140
  builder.add_node("arxiv_search", arxiv_search_node)
 
1141
 
1142
  # Define edges: these determine how the control flow moves
1143
  builder.add_edge(START, "assistant")
@@ -1162,23 +1291,25 @@ def create_agent_graph() -> StateGraph:
1162
  "debug",
1163
  router,
1164
  {
1165
- "web_search": "web_search",
1166
  "python_code": "python_code",
1167
  "webpage_scrape": "webpage_scrape",
1168
  "wikipedia_search": "wikipedia_search",
1169
  "tavily_search": "tavily_search",
1170
  "arxiv_search": "arxiv_search",
 
1171
  "end": END
1172
  }
1173
  )
1174
 
1175
  # Tools always go back to assistant
1176
- builder.add_edge("web_search", "assistant")
1177
  builder.add_edge("python_code", "assistant")
1178
  builder.add_edge("webpage_scrape", "assistant")
1179
  builder.add_edge("wikipedia_search", "assistant")
1180
  builder.add_edge("tavily_search", "assistant")
1181
  builder.add_edge("arxiv_search", "assistant")
 
1182
 
1183
  # Compile the graph
1184
  return builder.compile()
@@ -1234,7 +1365,7 @@ class TurboNerd:
1234
  # Example usage:
1235
  if __name__ == "__main__":
1236
  agent = TurboNerd(max_execution_time=60)
1237
- response = agent("What is the last sentence of albert einstein's wikipedia page?")
1238
  print("\nFinal Response:")
1239
  print(response)
1240
 
 
26
  from langchain_community.document_loaders import WikipediaLoader
27
  from langchain_community.document_loaders import ArxivLoader
28
  from langchain_community.tools.tavily_search import TavilySearchResults # For Tavily search
29
+ from supabase import create_client, Client
30
 
31
  load_dotenv()
32
 
 
117
  return f"Error executing code: {str(e)}"
118
 
119
  # Apify-based search function
120
+ # def apify_google_search(query: str, limit: int = 10) -> str:
121
+ # """
122
+ # Use Apify's Google Search Results Scraper to get search results
123
+ #
124
+ # Args:
125
+ # query: The search query string
126
+ # limit: Number of results to return (10, 20, 30, 40, 50, 100)
127
+ #
128
+ # Returns:
129
+ # Formatted search results as a string
130
+ # """
131
+ # # You would need to provide a valid Apify API token
132
+ # # You can get one by signing up at https://apify.com/
133
+ # # Replace this with your actual Apify API token or set as environment variable
134
+ # APIFY_API_TOKEN = os.environ.get("APIFY_API_TOKEN", "")
135
+ #
136
+ # if not APIFY_API_TOKEN:
137
+ # print("No Apify API token found. Using fallback search method.")
138
+ # return fallback_search(query)
139
+ #
140
+ # try:
141
+ # # Initialize the ApifyClient with API token
142
+ # client = ApifyClient(APIFY_API_TOKEN)
143
+ #
144
+ # # Prepare the Actor input - convert limit to string as required by the API
145
+ # run_input = {
146
+ # "keyword": query,
147
+ # "limit": str(limit), # Convert to string as required by the API
148
+ # "country": "US"
149
+ # }
150
+ #
151
+ # # The Actor ID for the Google Search Results Scraper
152
+ # ACTOR_ID = "563JCPLOqM1kMmbbP"
153
+ #
154
+ # print(f"Starting Apify search for: '{query}'")
155
+ #
156
+ # # Run the Actor and wait for it to finish (with timeout)
157
+ # run = client.actor(ACTOR_ID).call(run_input=run_input, timeout_secs=60)
158
+ #
159
+ # if not run or not run.get("defaultDatasetId"):
160
+ # print("Failed to get results from Apify actor")
161
+ # return fallback_search(query)
162
+ #
163
+ # # Fetch Actor results from the run's dataset
164
+ # results = []
165
+ # for item in client.dataset(run["defaultDatasetId"]).iterate_items():
166
+ # results.append(item)
167
+ #
168
+ # # Format and return the results
169
+ # return format_search_results(results, query)
170
+ #
171
+ # except Exception as e:
172
+ # print(f"Error using Apify: {str(e)}")
173
+ # return fallback_search(query)
174
 
175
  def scrape_webpage(url: str) -> str:
176
  """
 
271
  except Exception as e:
272
  return f"Error scraping webpage {url}: {str(e)}"
273
 
274
+ # Comment out the format_search_results function (around line 180)
275
+ # def format_search_results(results: List[Dict], query: str) -> str:
276
+ # """Format the search results into a readable string"""
277
+ # if not results or len(results) == 0:
278
+ # return f"No results found for query: {query}"
279
+ #
280
+ # print(f"Raw search results: {str(results)[:1000]}...")
281
+ #
282
+ # # Extract search results from the Apify output
283
+ # formatted_results = f"Search results for '{query}':\n\n"
284
+ #
285
+ # # Check if results is a list of dictionaries or a dictionary with nested results
286
+ # if isinstance(results, dict) and "results" in results:
287
+ # items = results["results"]
288
+ # elif isinstance(results, list):
289
+ # items = results
290
+ # else:
291
+ # return f"Unable to process results for query: {query}"
292
+ #
293
+ # # Handle different Apify result formats
294
+ # if len(items) > 0:
295
+ # # Check the structure of the first item to determine format
296
+ # first_item = items[0]
297
+ #
298
+ # # If item has 'organicResults', this is the format from some Apify actors
299
+ # if isinstance(first_item, dict) and "organicResults" in first_item:
300
+ # organic_results = first_item.get("organicResults", [])
301
+ # for i, result in enumerate(organic_results[:10], 1):
302
+ # if "title" in result and "url" in result:
303
+ # formatted_results += f"{i}. {result['title']}\n"
304
+ # formatted_results += f" URL: {result['url']}\n"
305
+ # if "snippet" in result:
306
+ # formatted_results += f" {result['snippet']}\n"
307
+ # formatted_results += "\n"
308
+ # else:
309
+ # # Standard format with title/url/description
310
+ # for i, result in enumerate(items[:10], 1):
311
+ # if "title" in result and "url" in result:
312
+ # formatted_results += f"{i}. {result['title']}\n"
313
+ # formatted_results += f" URL: {result['url']}\n"
314
+ # if "description" in result:
315
+ # formatted_results += f" {result['description']}\n"
316
+ # elif "snippet" in result:
317
+ # formatted_results += f" {result['snippet']}\n"
318
+ # formatted_results += "\n"
319
+ #
320
+ # return formatted_results
321
 
322
+ # Comment out the fallback_search function (around line 220)
323
+ # def fallback_search(query: str) -> str:
324
+ # """Fallback search method using DuckDuckGo when Apify is not available"""
325
+ # try:
326
+ # search_tool = DuckDuckGoSearchRun()
327
+ # result = search_tool.invoke(query)
328
+ # return "Observation: " + result
329
+ # except Exception as e:
330
+ # return f"Search error: {str(e)}. Please try a different query or method."
331
 
332
+ # Comment out the safe_web_search function (around line 230)
333
+ # def safe_web_search(query: str) -> str:
334
+ # """Search the web safely with error handling and retry logic."""
335
+ # if not query:
336
+ # return "Error: No search query provided. Please specify what you want to search for."
337
+ #
338
+ # # Try using Apify first, if it fails it will use the fallback
339
+ # return "Observation: " + apify_google_search(query)
340
+ #
341
+ # # The code below is kept for reference but won't be executed
342
+ # max_retries = 3
343
+ # backoff_factor = 1.5
344
+ #
345
+ # for attempt in range(max_retries):
346
+ # try:
347
+ # # Use the DuckDuckGoSearchRun tool
348
+ # search_tool = DuckDuckGoSearchRun()
349
+ # result = search_tool.invoke(query)
350
+ #
351
+ # # If we get an empty result, provide a helpful message
352
+ # if not result or len(result.strip()) < 10:
353
+ # return f"The search for '{query}' did not return any useful results. Please try a more specific query or a different search engine."
354
+ #
355
+ # return "Observation: " + result
356
+ #
357
+ # except Exception as e:
358
+ # # If we're being rate limited
359
+ # if "Ratelimit" in str(e) or "429" in str(e):
360
+ # if attempt < max_retries - 1:
361
+ # wait_time = backoff_factor ** attempt
362
+ # print(f"Rate limited, waiting {wait_time:.2f} seconds before retrying...")
363
+ # time.sleep(wait_time)
364
+ # else:
365
+ # # On last attempt, return a helpful error
366
+ # error_msg = f"I'm currently unable to search for '{query}' due to service rate limits. "
367
+ # return error_msg
368
+ # else:
369
+ # # For other types of errors
370
+ # return f"Error while searching for '{query}': {str(e)}"
371
+ #
372
+ # return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
373
 
374
  def wikipedia_search(query: str, num_results: int = 3) -> str:
375
  """
 
538
  except Exception as e:
539
  return f"Error searching ArXiv: {str(e)}"
540
 
541
+ def supabase_operation(operation_type: str, table: str, data: dict = None, filters: dict = None) -> str:
542
+ """
543
+ Perform operations on Supabase database.
544
+
545
+ Args:
546
+ operation_type: Type of operation ('insert', 'select', 'update', 'delete')
547
+ table: Name of the table to operate on
548
+ data: Data to insert/update (for insert/update operations)
549
+ filters: Filters for select/update/delete operations (e.g., {"id": 1})
550
+
551
+ Returns:
552
+ Result of the operation as a formatted string
553
+ """
554
+ try:
555
+ # Get Supabase credentials from environment variables
556
+ supabase_url = os.environ.get("SUPABASE_URL")
557
+ supabase_key = os.environ.get("SUPABASE_ANON_KEY")
558
+
559
+ if not supabase_url or not supabase_key:
560
+ return "Error: Supabase credentials not found. Please set SUPABASE_URL and SUPABASE_ANON_KEY environment variables."
561
+
562
+ # Create Supabase client
563
+ supabase: Client = create_client(supabase_url, supabase_key)
564
+
565
+ # Validate inputs
566
+ if not table:
567
+ return "Error: Table name is required."
568
+
569
+ if operation_type not in ['insert', 'select', 'update', 'delete']:
570
+ return "Error: Invalid operation type. Use 'insert', 'select', 'update', or 'delete'."
571
+
572
+ # Perform the operation based on type
573
+ if operation_type == 'insert':
574
+ if not data:
575
+ return "Error: Data is required for insert operation."
576
+
577
+ result = supabase.table(table).insert(data).execute()
578
+ return f"Insert successful: {len(result.data)} row(s) inserted into {table}"
579
+
580
+ elif operation_type == 'select':
581
+ query = supabase.table(table).select("*")
582
+
583
+ # Apply filters if provided
584
+ if filters:
585
+ for key, value in filters.items():
586
+ query = query.eq(key, value)
587
+
588
+ result = query.execute()
589
+ return f"Select successful: Found {len(result.data)} row(s) in {table}\nData: {json.dumps(result.data, indent=2)}"
590
+
591
+ elif operation_type == 'update':
592
+ if not data or not filters:
593
+ return "Error: Both data and filters are required for update operation."
594
+
595
+ query = supabase.table(table).update(data)
596
+
597
+ # Apply filters
598
+ for key, value in filters.items():
599
+ query = query.eq(key, value)
600
+
601
+ result = query.execute()
602
+ return f"Update successful: {len(result.data)} row(s) updated in {table}"
603
+
604
+ elif operation_type == 'delete':
605
+ if not filters:
606
+ return "Error: Filters are required for delete operation."
607
+
608
+ query = supabase.table(table).delete()
609
+
610
+ # Apply filters
611
+ for key, value in filters.items():
612
+ query = query.eq(key, value)
613
+
614
+ result = query.execute()
615
+ return f"Delete successful: Rows deleted from {table}"
616
+
617
+ except Exception as e:
618
+ return f"Error performing Supabase operation: {str(e)}"
619
+
620
  # System prompt to guide the model's behavior
621
+ #web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
622
+ #webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
623
+ #Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
624
+
625
  SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
626
 
627
  python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
628
  wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
629
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
630
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
631
+ supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
 
 
 
 
 
 
 
 
 
 
 
632
 
633
  The way you use the tools is by specifying a json blob.
634
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
 
638
  wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
639
  tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
640
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
 
641
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
642
+ supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
643
 
644
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
645
 
646
+ Example use for Supabase:
647
 
648
+ Insert data:
649
  ```json
650
  {
651
+ "action": "supabase_operation",
652
+ "action_input": {"operation_type": "insert", "table": "users", "data": {"name": "John Doe", "email": "john@example.com"}}
653
  }
654
  ```
655
 
656
+ Select data:
 
657
  ```json
658
  {
659
+ "action": "supabase_operation",
660
+ "action_input": {"operation_type": "select", "table": "users", "filters": {"id": 1}}
661
  }
662
  ```
663
 
664
+ Update data:
665
+ ```json
666
+ {
667
+ "action": "supabase_operation",
668
+ "action_input": {"operation_type": "update", "table": "users", "data": {"name": "Jane Doe"}, "filters": {"id": 1}}
669
+ }
670
+ ```
671
 
672
+ Delete data:
673
  ```json
674
  {
675
+ "action": "supabase_operation",
676
+ "action_input": {"operation_type": "delete", "table": "users", "filters": {"id": 1}}
677
  }
678
  ```
679
 
680
  ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
 
681
  Question: [the user's question]
 
682
  Thought: [your reasoning about what to do next]
 
683
  Action:
684
  ```json
685
  {
 
687
  "action_input": {"[parameter_name]": "[parameter_value]"}
688
  }
689
  ```
 
690
  Observation: [the result from the tool will appear here]
 
691
  Thought: [your reasoning after seeing the observation]
 
692
  Action:
693
  ```json
694
  {
 
696
  "action_input": {"[parameter_name]": "[parameter_value]"}
697
  }
698
  ```
 
699
  Observation: [another tool result will appear here]
 
700
  IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
701
  1. First reason about the problem in the "Thought" section
702
  2. Then decide what action to take in the "Action" section (using the tools)
 
707
  NEVER fake or simulate tool output yourself.
708
 
709
  ... (this Thought/Action/Observation cycle can repeat as needed) ...
 
710
  Thought: I now know the final answer
 
711
  Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
712
  Make sure to follow any formatting instructions given by the user.
 
713
  Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
714
 
715
  # Generate the chat interface, including the tools
 
721
  chat = llm
722
  # Tools are defined but not bound to the LLM here
723
  tools_config = [
724
+ # {
725
+ # "name": "web_search",
726
+ # "description": "Search the web for current information. Provide a specific search query in the format: {\"query\": \"your search query here\"}",
727
+ # "func": safe_web_search
728
+ # },
729
  {
730
  "name": "python_code",
731
  "description": "Execute Python code. Provide the complete Python code as a string in the format: {\"code\": \"your python code here\"}",
732
  "func": run_python_code
733
  },
734
+ # {
735
+ # "name": "webpage_scrape",
736
+ # "description": "Scrape content from a specific webpage URL. Provide a valid URL in the format: {\"url\": \"https://example.com\"}",
737
+ # "func": scrape_webpage
738
+ # },
739
  {
740
  "name": "wikipedia_search",
741
  "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
 
750
  "name": "arxiv_search",
751
  "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
752
  "func": arxiv_search
753
+ },
754
+ {
755
+ "name": "supabase_operation",
756
+ "description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
757
+ "func": supabase_operation
758
  }
759
  ]
760
 
 
889
  print(f"Error extracting JSON: {e}")
890
  return None
891
 
892
+ # Comment out the web_search_node function
893
+ # def web_search_node(state: AgentState) -> Dict[str, Any]:
894
+ # """Node that executes the web search tool."""
895
+ # print("Web Search Tool Called...\n\n")
896
+ #
897
+ # # Extract tool arguments
898
+ # action_input = state.get("action_input", {})
899
+ # print(f"Web search action_input: {action_input}")
900
+ #
901
+ # # Try different ways to extract the query
902
+ # query = ""
903
+ # if isinstance(action_input, dict):
904
+ # query = action_input.get("query", "")
905
+ # elif isinstance(action_input, str):
906
+ # query = action_input
907
+ #
908
+ # print(f"Searching for: '{query}'")
909
+ #
910
+ # # Call the search function with retry logic
911
+ # result = safe_web_search(query)
912
+ # print(f"Search result: {result}") # Print the full result for debugging
913
+ #
914
+ # # Check if we hit rate limits and add a helpful note
915
+ # if "rate limit" in result.lower() or "ratelimit" in result.lower():
916
+ # result += "\n\nNote: You can use your internal knowledge to provide a response since the search is rate limited."
917
+ #
918
+ # # Format the observation to continue the ReAct cycle
919
+ # # Don't include "Observation:" as the assistant is stopped at this token
920
+ # observation = result
921
+ #
922
+ # # Create a tool message with the result
923
+ # tool_message = AIMessage(
924
+ # content=f"Observation: {observation}"
925
+ # )
926
+ #
927
+ # # Print the observation that will be sent back to the assistant
928
+ # print("\n=== TOOL OBSERVATION ===")
929
+ # print(tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content)
930
+ # print("=== END OBSERVATION ===\n")
931
+ #
932
+ # # Return the updated state
933
+ # return {
934
+ # "messages": state["messages"] + [tool_message],
935
+ # "current_tool": None, # Reset the current tool
936
+ # "action_input": None # Clear the action input
937
+ # }
938
 
939
  def python_code_node(state: AgentState) -> Dict[str, Any]:
940
  """Node that executes Python code."""
 
1179
  "action_input": None # Clear the action input
1180
  }
1181
 
1182
+ def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
1183
+ """Node that processes Supabase database operations."""
1184
+ print("Supabase Operation Tool Called...\n\n")
1185
+
1186
+ # Extract tool arguments
1187
+ action_input = state.get("action_input", {})
1188
+ print(f"Supabase operation action_input: {action_input}")
1189
+
1190
+ # Extract required parameters
1191
+ operation_type = ""
1192
+ table = ""
1193
+ data = None
1194
+ filters = None
1195
+
1196
+ if isinstance(action_input, dict):
1197
+ operation_type = action_input.get("operation_type", "")
1198
+ table = action_input.get("table", "")
1199
+ data = action_input.get("data")
1200
+ filters = action_input.get("filters")
1201
+
1202
+ print(f"Supabase operation: {operation_type} on table {table}")
1203
+
1204
+ # Safety check
1205
+ if not operation_type or not table:
1206
+ result = "Error: Both operation_type and table are required. operation_type should be one of: insert, select, update, delete"
1207
+ else:
1208
+ # Call the Supabase operation function
1209
+ result = supabase_operation(operation_type, table, data, filters)
1210
+
1211
+ print(f"Supabase operation result length: {len(result)}")
1212
+
1213
+ # Format the observation to continue the ReAct cycle
1214
+ tool_message = AIMessage(
1215
+ content=f"Observation: {result.strip()}"
1216
+ )
1217
+
1218
+ # Print the observation that will be sent back to the assistant
1219
+ print("\n=== TOOL OBSERVATION ===")
1220
+ content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1221
+ print(content_preview)
1222
+ print("=== END OBSERVATION ===\n")
1223
+
1224
+ # Return the updated state
1225
+ return {
1226
+ "messages": state["messages"] + [tool_message],
1227
+ "current_tool": None, # Reset the current tool
1228
+ "action_input": None # Clear the action input
1229
+ }
1230
+
1231
  # Router function to direct to the correct tool
1232
  def router(state: AgentState) -> str:
1233
  """Route to the appropriate tool based on the current_tool field."""
 
1236
  print(f"Routing to: {tool}")
1237
  print(f"Router received action_input: {action_input}")
1238
 
1239
+ # if tool == "web_search":
1240
+ # return "web_search"
1241
+ if tool == "python_code":
1242
  return "python_code"
1243
  elif tool == "webpage_scrape":
1244
  return "webpage_scrape"
 
1248
  return "tavily_search"
1249
  elif tool == "arxiv_search":
1250
  return "arxiv_search"
1251
+ elif tool == "supabase_operation":
1252
+ return "supabase_operation"
1253
  else:
1254
  return "end"
1255
 
 
1260
 
1261
  # Define nodes: these do the work
1262
  builder.add_node("assistant", assistant)
1263
+ # builder.add_node("web_search", web_search_node)
1264
  builder.add_node("python_code", python_code_node)
1265
  builder.add_node("webpage_scrape", webpage_scrape_node)
1266
  builder.add_node("wikipedia_search", wikipedia_search_node)
1267
  builder.add_node("tavily_search", tavily_search_node)
1268
  builder.add_node("arxiv_search", arxiv_search_node)
1269
+ builder.add_node("supabase_operation", supabase_operation_node)
1270
 
1271
  # Define edges: these determine how the control flow moves
1272
  builder.add_edge(START, "assistant")
 
1291
  "debug",
1292
  router,
1293
  {
1294
+ # "web_search": "web_search",
1295
  "python_code": "python_code",
1296
  "webpage_scrape": "webpage_scrape",
1297
  "wikipedia_search": "wikipedia_search",
1298
  "tavily_search": "tavily_search",
1299
  "arxiv_search": "arxiv_search",
1300
+ "supabase_operation": "supabase_operation",
1301
  "end": END
1302
  }
1303
  )
1304
 
1305
  # Tools always go back to assistant
1306
+ # builder.add_edge("web_search", "assistant")
1307
  builder.add_edge("python_code", "assistant")
1308
  builder.add_edge("webpage_scrape", "assistant")
1309
  builder.add_edge("wikipedia_search", "assistant")
1310
  builder.add_edge("tavily_search", "assistant")
1311
  builder.add_edge("arxiv_search", "assistant")
1312
+ builder.add_edge("supabase_operation", "assistant")
1313
 
1314
  # Compile the graph
1315
  return builder.compile()
 
1365
  # Example usage:
1366
  if __name__ == "__main__":
1367
  agent = TurboNerd(max_execution_time=60)
1368
+ response = agent("When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?")
1369
  print("\nFinal Response:")
1370
  print(response)
1371
 
requirements.txt CHANGED
@@ -7,4 +7,5 @@ duckduckgo-search
7
  langchain-community
8
  apify-client
9
  beautifulsoup4
10
- html2text
 
 
7
  langchain-community
8
  apify-client
9
  beautifulsoup4
10
+ html2text
11
+ supabase