Lasdw commited on
Commit
5a47613
·
1 Parent(s): 4c7c07c

Added more tools

Browse files
Files changed (2) hide show
  1. agent.py +339 -123
  2. requirements.txt +1 -5
agent.py CHANGED
@@ -21,102 +21,14 @@ from urllib.parse import quote, urlparse
21
  import sys
22
  from bs4 import BeautifulSoup
23
  import html2text
24
- import base64
25
- from io import BytesIO
26
- from PIL import Image
27
- from typing import Optional, Union
28
- import torch
29
- from transformers import AutoProcessor, AutoModelForVision2Seq
30
 
31
  from apify_client import ApifyClient
 
 
 
32
 
33
  load_dotenv()
34
 
35
- # Initialize vision model and processor globally for reuse
36
- # This avoids reloading the model on each call
37
- try:
38
- # Load model and processor once
39
- vision_processor = AutoProcessor.from_pretrained("google/paligemma-3b-mix-224")
40
- vision_model = AutoModelForVision2Seq.from_pretrained("google/paligemma-3b-mix-224", torch_dtype=torch.float16)
41
-
42
- # Move model to GPU if available
43
- if torch.cuda.is_available():
44
- vision_model = vision_model.to("cuda")
45
-
46
- VISION_MODEL_LOADED = True
47
- print("Vision model loaded successfully")
48
- except Exception as e:
49
- VISION_MODEL_LOADED = False
50
- print(f"Error loading vision model: {e}")
51
-
52
- def describe_image(image_url: str) -> str:
53
- """
54
- Process an image from a URL and generate a description using a vision model.
55
-
56
- Args:
57
- image_url: URL of the image to describe
58
-
59
- Returns:
60
- Text description of the image
61
- """
62
- if not VISION_MODEL_LOADED:
63
- return "Error: Vision model is not available. Could not load the model."
64
-
65
- try:
66
- # Check if URL is valid
67
- if not image_url or not isinstance(image_url, str):
68
- return "Error: Invalid image URL. Please provide a valid URL."
69
-
70
- # Standardize prompt for the vision model
71
- prompt = "Describe this image in detail."
72
-
73
- # Download the image
74
- print(f"Downloading image from: {image_url}")
75
- response = requests.get(image_url, timeout=10)
76
-
77
- if response.status_code != 200:
78
- return f"Error: Failed to download image (Status code: {response.status_code})"
79
-
80
- # Open the image from bytes
81
- image = Image.open(BytesIO(response.content))
82
-
83
- # Ensure the image is in RGB format (some images might be in RGBA or other formats)
84
- if image.mode != "RGB":
85
- image = image.convert("RGB")
86
-
87
- # Process image and text inputs
88
- inputs = vision_processor(text=prompt, images=image, return_tensors="pt")
89
-
90
- # Move inputs to the same device as the model
91
- if torch.cuda.is_available():
92
- inputs = {name: tensor.to("cuda") for name, tensor in inputs.items()}
93
-
94
- # Generate description
95
- with torch.no_grad():
96
- output = vision_model.generate(
97
- **inputs,
98
- max_new_tokens=256,
99
- temperature=0.1,
100
- do_sample=True
101
- )
102
-
103
- # Decode the output
104
- generated_text = vision_processor.decode(output[0], skip_special_tokens=True)
105
-
106
- # Trim any leading/trailing whitespace and remove the prompt if it's included in the output
107
- description = generated_text.strip()
108
- if description.startswith(prompt):
109
- description = description[len(prompt):].strip()
110
-
111
- return f"Image description: {description}"
112
-
113
- except requests.exceptions.Timeout:
114
- return f"Error: Request timed out while trying to download the image."
115
- except requests.exceptions.ConnectionError:
116
- return f"Error: Failed to connect to the image URL. The site might be down or the URL might be incorrect."
117
- except Exception as e:
118
- return f"Error processing image: {str(e)}"
119
-
120
  def run_python_code(code: str):
121
  """Execute Python code in a temporary file and return the output."""
122
  # Check for potentially dangerous operations
@@ -258,6 +170,7 @@ def apify_google_search(query: str, limit: int = 10) -> str:
258
  except Exception as e:
259
  print(f"Error using Apify: {str(e)}")
260
  return fallback_search(query)
 
261
  def scrape_webpage(url: str) -> str:
262
  """
263
  Safely scrape content from a specified URL.
@@ -356,6 +269,7 @@ def scrape_webpage(url: str) -> str:
356
  return f"Error requesting {url}: {str(e)}"
357
  except Exception as e:
358
  return f"Error scraping webpage {url}: {str(e)}"
 
359
  def format_search_results(results: List[Dict], query: str) -> str:
360
  """Format the search results into a readable string"""
361
  if not results or len(results) == 0:
@@ -454,13 +368,182 @@ def safe_web_search(query: str) -> str:
454
 
455
  return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  # System prompt to guide the model's behavior
458
  SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
459
 
460
- web_search: Search the web for current information. Provide a specific search query.
461
  python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
462
  webpage_scrape: Scrape content from a specific webpage URL. Provide a valid URL to extract information from a particular web page.
463
- describe_image: Analyze and describe an image from a URL. Provide a valid image URL to get a detailed description.
 
 
464
 
465
  IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
466
  1. First reason about the problem in the "Thought" section
@@ -478,7 +561,9 @@ The only values that should be in the "action" field are:
478
  web_search: Search the web for current information, args: {"query": {"type": "string"}}
479
  python_code: Execute Python code, args: {"code": {"type": "string"}}
480
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
481
- describe_image: Analyze an image, args: {"image_url": {"type": "string"}}
 
 
482
 
483
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
484
 
@@ -500,12 +585,12 @@ Or for scraping a webpage:
500
  }
501
  ```
502
 
503
- Or for describing an image:
504
 
505
  ```json
506
  {
507
- "action": "describe_image",
508
- "action_input": {"image_url": "https://example.com/image.jpg"}
509
  }
510
  ```
511
 
@@ -553,7 +638,6 @@ Thought: I now know the final answer
553
  Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
554
 
555
  Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
556
- #Your response will be evaluated for accuracy and completeness. After you provide an answer, an evaluator will check your work and may ask you to improve it. The evaluation process has a maximum of 3 attempts.
557
 
558
  # Generate the chat interface, including the tools
559
  llm = ChatOpenAI(
@@ -580,9 +664,19 @@ tools_config = [
580
  "func": scrape_webpage
581
  },
582
  {
583
- "name": "describe_image",
584
- "description": "Analyze and describe an image from a URL. Provide a valid image URL in the format: {\"image_url\": \"https://example.com/image.jpg\"}",
585
- "func": describe_image
 
 
 
 
 
 
 
 
 
 
586
  }
587
  ]
588
 
@@ -596,6 +690,9 @@ class ActionInput(TypedDict, total=False):
596
  code: Optional[str]
597
  url: Optional[str]
598
  image_url: Optional[str]
 
 
 
599
 
600
  class AgentState(TypedDict, total=False):
601
  messages: Annotated[list[AnyMessage], add_messages]
@@ -857,34 +954,143 @@ def webpage_scrape_node(state: AgentState) -> Dict[str, Any]:
857
  "action_input": None # Clear the action input
858
  }
859
 
860
- def describe_image_node(state: AgentState) -> Dict[str, Any]:
861
- """Node that processes image description requests."""
862
- print("Image Description Tool Called...\n\n")
863
 
864
  # Extract tool arguments
865
  action_input = state.get("action_input", {})
866
- print(f"Image description action_input: {action_input}")
 
 
 
 
867
 
868
- # Try different ways to extract the image URL
869
- image_url = ""
870
  if isinstance(action_input, dict):
871
- image_url = action_input.get("image_url", "")
 
 
 
 
 
872
  elif isinstance(action_input, str):
873
- image_url = action_input
874
 
875
- print(f"Processing image URL: '{image_url}'")
876
 
877
- # Safety check - don't run with empty URL
878
- if not image_url:
879
- result = "Error: No image URL provided. Please provide a valid image URL to describe."
880
  else:
881
- # Call the image description function
882
- result = describe_image(image_url)
883
 
884
- print(f"Image description result length: {len(result)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
 
886
  # Format the observation to continue the ReAct cycle
887
- # Always prefix with "Observation:" for consistency in the ReAct cycle
888
  tool_message = AIMessage(
889
  content=f"Observation: {result.strip()}"
890
  )
@@ -916,8 +1122,12 @@ def router(state: AgentState) -> str:
916
  return "python_code"
917
  elif tool == "webpage_scrape":
918
  return "webpage_scrape"
919
- elif tool == "describe_image":
920
- return "describe_image"
 
 
 
 
921
  else:
922
  return "end"
923
 
@@ -931,7 +1141,9 @@ def create_agent_graph() -> StateGraph:
931
  builder.add_node("web_search", web_search_node)
932
  builder.add_node("python_code", python_code_node)
933
  builder.add_node("webpage_scrape", webpage_scrape_node)
934
- builder.add_node("describe_image", describe_image_node)
 
 
935
 
936
  # Define edges: these determine how the control flow moves
937
  builder.add_edge(START, "assistant")
@@ -959,7 +1171,9 @@ def create_agent_graph() -> StateGraph:
959
  "web_search": "web_search",
960
  "python_code": "python_code",
961
  "webpage_scrape": "webpage_scrape",
962
- "describe_image": "describe_image",
 
 
963
  "end": END
964
  }
965
  )
@@ -968,7 +1182,9 @@ def create_agent_graph() -> StateGraph:
968
  builder.add_edge("web_search", "assistant")
969
  builder.add_edge("python_code", "assistant")
970
  builder.add_edge("webpage_scrape", "assistant")
971
- builder.add_edge("describe_image", "assistant")
 
 
972
 
973
  # Compile with a reasonable recursion limit to prevent infinite loops
974
  return builder.compile()
@@ -1024,7 +1240,7 @@ class TurboNerd:
1024
  # Example usage:
1025
  if __name__ == "__main__":
1026
  agent = TurboNerd(max_execution_time=60)
1027
- response = agent("Looking at image.png which shows a chess position, what is the best move for white? Please analyze the position and suggest the strongest continuation.")
1028
  print("\nFinal Response:")
1029
  print(response)
1030
 
 
21
  import sys
22
  from bs4 import BeautifulSoup
23
  import html2text
 
 
 
 
 
 
24
 
25
  from apify_client import ApifyClient
26
+ from langchain_community.document_loaders import WikipediaLoader
27
+ from langchain_community.document_loaders import ArxivLoader
28
+ from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper # For Tavily search
29
 
30
  load_dotenv()
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def run_python_code(code: str):
33
  """Execute Python code in a temporary file and return the output."""
34
  # Check for potentially dangerous operations
 
170
  except Exception as e:
171
  print(f"Error using Apify: {str(e)}")
172
  return fallback_search(query)
173
+
174
  def scrape_webpage(url: str) -> str:
175
  """
176
  Safely scrape content from a specified URL.
 
269
  return f"Error requesting {url}: {str(e)}"
270
  except Exception as e:
271
  return f"Error scraping webpage {url}: {str(e)}"
272
+
273
  def format_search_results(results: List[Dict], query: str) -> str:
274
  """Format the search results into a readable string"""
275
  if not results or len(results) == 0:
 
368
 
369
  return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
370
 
371
+ def wikipedia_search(query: str, num_results: int = 3) -> str:
372
+ """
373
+ Search Wikipedia for information about a specific query.
374
+
375
+ Args:
376
+ query: Search query
377
+ num_results: Number of search results to return (default: 3)
378
+
379
+ Returns:
380
+ Formatted Wikipedia search results
381
+ """
382
+ try:
383
+ # Validate input
384
+ if not query or not isinstance(query, str):
385
+ return "Error: Please provide a valid search query."
386
+
387
+ # Ensure num_results is valid
388
+ try:
389
+ num_results = int(num_results)
390
+ if num_results <= 0:
391
+ num_results = 3 # Default to 3 if invalid
392
+ except:
393
+ num_results = 3 # Default to 3 if conversion fails
394
+
395
+ print(f"Searching Wikipedia for: {query}")
396
+
397
+ # Use WikipediaLoader from LangChain
398
+ loader = WikipediaLoader(query=query, load_max_docs=num_results)
399
+ docs = loader.load()
400
+
401
+ if not docs:
402
+ return f"No Wikipedia results found for '{query}'. Try refining your search."
403
+
404
+ # Format the results
405
+ formatted_results = f"Wikipedia search results for '{query}':\n\n"
406
+
407
+ for i, doc in enumerate(docs, 1):
408
+ title = doc.metadata.get('title', 'Unknown Title')
409
+ source = doc.metadata.get('source', 'No URL')
410
+ content = doc.page_content
411
+
412
+ # Truncate content if too long
413
+ if len(content) > 500:
414
+ content = content[:500] + "..."
415
+
416
+ formatted_results += f"{i}. {title}\n"
417
+ formatted_results += f" URL: {source}\n"
418
+ formatted_results += f" {content}\n\n"
419
+
420
+ return formatted_results
421
+
422
+ except Exception as e:
423
+ return f"Error searching Wikipedia: {str(e)}"
424
+
425
+ def tavily_search(query: str, search_depth: str = "basic") -> str:
426
+ """
427
+ Search the web using the Tavily Search API.
428
+
429
+ Args:
430
+ query: Search query
431
+ search_depth: Depth of search ('basic' or 'comprehensive')
432
+
433
+ Returns:
434
+ Formatted search results from Tavily
435
+ """
436
+ try:
437
+ # Check for API key
438
+ tavily_api_key = os.environ.get("TAVILY_API_KEY")
439
+ if not tavily_api_key:
440
+ return "Error: Tavily API key not found. Please set the TAVILY_API_KEY environment variable."
441
+
442
+ # Validate input
443
+ if not query or not isinstance(query, str):
444
+ return "Error: Please provide a valid search query."
445
+
446
+ # Validate search_depth
447
+ if search_depth not in ["basic", "comprehensive"]:
448
+ search_depth = "basic" # Default to basic if invalid
449
+
450
+ print(f"Searching Tavily for: {query} (depth: {search_depth})")
451
+
452
+ # Initialize the Tavily search wrapper
453
+ search = TavilySearchAPIWrapper()
454
+
455
+ # Execute the search
456
+ results = search.results(query, search_depth=search_depth)
457
+
458
+ if not results:
459
+ return f"No Tavily search results found for '{query}'. Try refining your search."
460
+
461
+ # Format the results
462
+ formatted_results = f"Tavily search results for '{query}':\n\n"
463
+
464
+ for i, result in enumerate(results, 1):
465
+ formatted_results += f"{i}. {result.get('title', 'No title')}\n"
466
+ formatted_results += f" URL: {result.get('url', 'No URL')}\n"
467
+ formatted_results += f" {result.get('content', 'No content')}\n\n"
468
+
469
+ return formatted_results
470
+
471
+ except Exception as e:
472
+ return f"Error searching with Tavily: {str(e)}"
473
+
474
+ def arxiv_search(query: str, max_results: int = 5) -> str:
475
+ """
476
+ Search ArXiv for scientific papers matching the query.
477
+
478
+ Args:
479
+ query: Search query for ArXiv
480
+ max_results: Maximum number of results to return
481
+
482
+ Returns:
483
+ Formatted ArXiv search results
484
+ """
485
+ try:
486
+ # Validate input
487
+ if not query or not isinstance(query, str):
488
+ return "Error: Please provide a valid search query."
489
+
490
+ # Ensure max_results is valid
491
+ try:
492
+ max_results = int(max_results)
493
+ if max_results <= 0 or max_results > 10:
494
+ max_results = 5 # Default to 5 if invalid or too large
495
+ except:
496
+ max_results = 5 # Default to 5 if conversion fails
497
+
498
+ print(f"Searching ArXiv for: {query}")
499
+
500
+ # Use ArxivLoader from LangChain
501
+ loader = ArxivLoader(
502
+ query=query,
503
+ load_max_docs=max_results,
504
+ load_all_available_meta=True
505
+ )
506
+
507
+ docs = loader.load()
508
+
509
+ if not docs:
510
+ return f"No ArXiv papers found for '{query}'. Try refining your search."
511
+
512
+ # Format the results
513
+ formatted_results = f"ArXiv papers for '{query}':\n\n"
514
+
515
+ for i, doc in enumerate(docs, 1):
516
+ meta = doc.metadata
517
+ title = meta.get('Title', 'Unknown Title')
518
+ url = meta.get('Entry ID', 'No URL')
519
+ authors = meta.get('Authors', 'Unknown Authors')
520
+ published = meta.get('Published', 'Unknown Date')
521
+
522
+ formatted_results += f"{i}. {title}\n"
523
+ formatted_results += f" URL: {url}\n"
524
+ formatted_results += f" Authors: {authors}\n"
525
+ formatted_results += f" Published: {published}\n"
526
+
527
+ # Add abstract, truncated if too long
528
+ abstract = doc.page_content.replace('\n', ' ')
529
+ if len(abstract) > 300:
530
+ abstract = abstract[:300] + "..."
531
+ formatted_results += f" Abstract: {abstract}\n\n"
532
+
533
+ return formatted_results
534
+
535
+ except Exception as e:
536
+ return f"Error searching ArXiv: {str(e)}"
537
+
538
  # System prompt to guide the model's behavior
539
  SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
540
 
541
+ web_search: Search the google search engine for current information. Provide a specific search query.
542
  python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
543
  webpage_scrape: Scrape content from a specific webpage URL. Provide a valid URL to extract information from a particular web page.
544
+ wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
545
+ tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
546
+ arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
547
 
548
  IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
549
  1. First reason about the problem in the "Thought" section
 
561
  web_search: Search the web for current information, args: {"query": {"type": "string"}}
562
  python_code: Execute Python code, args: {"code": {"type": "string"}}
563
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
564
+ wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
565
+ tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
566
+ arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
567
 
568
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
569
 
 
585
  }
586
  ```
587
 
588
+ Or for searching Wikipedia:
589
 
590
  ```json
591
  {
592
+ "action": "wikipedia_search",
593
+ "action_input": {"query": "quantum physics", "num_results": 3}
594
  }
595
  ```
596
 
 
638
  Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
639
 
640
  Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
 
641
 
642
  # Generate the chat interface, including the tools
643
  llm = ChatOpenAI(
 
664
  "func": scrape_webpage
665
  },
666
  {
667
+ "name": "wikipedia_search",
668
+ "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
669
+ "func": wikipedia_search
670
+ },
671
+ {
672
+ "name": "tavily_search",
673
+ "description": "Search the web using Tavily for more comprehensive results. Provide a query in the format: {\"query\": \"your search query\", \"search_depth\": \"basic\"}",
674
+ "func": tavily_search
675
+ },
676
+ {
677
+ "name": "arxiv_search",
678
+ "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
679
+ "func": arxiv_search
680
  }
681
  ]
682
 
 
690
  code: Optional[str]
691
  url: Optional[str]
692
  image_url: Optional[str]
693
+ num_results: Optional[int]
694
+ search_depth: Optional[str]
695
+ max_results: Optional[int]
696
 
697
  class AgentState(TypedDict, total=False):
698
  messages: Annotated[list[AnyMessage], add_messages]
 
954
  "action_input": None # Clear the action input
955
  }
956
 
957
+ def wikipedia_search_node(state: AgentState) -> Dict[str, Any]:
958
+ """Node that processes Wikipedia search requests."""
959
+ print("Wikipedia Search Tool Called...\n\n")
960
 
961
  # Extract tool arguments
962
  action_input = state.get("action_input", {})
963
+ print(f"Wikipedia search action_input: {action_input}")
964
+
965
+ # Extract query and num_results
966
+ query = ""
967
+ num_results = 3 # Default
968
 
 
 
969
  if isinstance(action_input, dict):
970
+ query = action_input.get("query", "")
971
+ if "num_results" in action_input:
972
+ try:
973
+ num_results = int(action_input["num_results"])
974
+ except:
975
+ print("Invalid num_results, using default")
976
  elif isinstance(action_input, str):
977
+ query = action_input
978
 
979
+ print(f"Searching Wikipedia for: '{query}' (max results: {num_results})")
980
 
981
+ # Safety check - don't run with empty query
982
+ if not query:
983
+ result = "Error: No search query provided. Please provide a valid query for Wikipedia search."
984
  else:
985
+ # Call the Wikipedia search function
986
+ result = wikipedia_search(query, num_results)
987
 
988
+ print(f"Wikipedia search result length: {len(result)}")
989
+
990
+ # Format the observation to continue the ReAct cycle
991
+ tool_message = AIMessage(
992
+ content=f"Observation: {result.strip()}"
993
+ )
994
+
995
+ # Print the observation that will be sent back to the assistant
996
+ print("\n=== TOOL OBSERVATION ===")
997
+ content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
998
+ print(content_preview)
999
+ print("=== END OBSERVATION ===\n")
1000
+
1001
+ # Return the updated state
1002
+ return {
1003
+ "messages": state["messages"] + [tool_message],
1004
+ "current_tool": None, # Reset the current tool
1005
+ "action_input": None # Clear the action input
1006
+ }
1007
+
1008
+ def tavily_search_node(state: AgentState) -> Dict[str, Any]:
1009
+ """Node that processes Tavily search requests."""
1010
+ print("Tavily Search Tool Called...\n\n")
1011
+
1012
+ # Extract tool arguments
1013
+ action_input = state.get("action_input", {})
1014
+ print(f"Tavily search action_input: {action_input}")
1015
+
1016
+ # Extract query and search_depth
1017
+ query = ""
1018
+ search_depth = "basic" # Default
1019
+
1020
+ if isinstance(action_input, dict):
1021
+ query = action_input.get("query", "")
1022
+ if "search_depth" in action_input:
1023
+ depth = action_input["search_depth"]
1024
+ if depth in ["basic", "comprehensive"]:
1025
+ search_depth = depth
1026
+ elif isinstance(action_input, str):
1027
+ query = action_input
1028
+
1029
+ print(f"Searching Tavily for: '{query}' (depth: {search_depth})")
1030
+
1031
+ # Safety check - don't run with empty query
1032
+ if not query:
1033
+ result = "Error: No search query provided. Please provide a valid query for Tavily search."
1034
+ else:
1035
+ # Call the Tavily search function
1036
+ result = tavily_search(query, search_depth)
1037
+
1038
+ print(f"Tavily search result length: {len(result)}")
1039
+
1040
+ # Format the observation to continue the ReAct cycle
1041
+ tool_message = AIMessage(
1042
+ content=f"Observation: {result.strip()}"
1043
+ )
1044
+
1045
+ # Print the observation that will be sent back to the assistant
1046
+ print("\n=== TOOL OBSERVATION ===")
1047
+ content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1048
+ print(content_preview)
1049
+ print("=== END OBSERVATION ===\n")
1050
+
1051
+ # Return the updated state
1052
+ return {
1053
+ "messages": state["messages"] + [tool_message],
1054
+ "current_tool": None, # Reset the current tool
1055
+ "action_input": None # Clear the action input
1056
+ }
1057
+
1058
+ def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
1059
+ """Node that processes ArXiv search requests."""
1060
+ print("ArXiv Search Tool Called...\n\n")
1061
+
1062
+ # Extract tool arguments
1063
+ action_input = state.get("action_input", {})
1064
+ print(f"ArXiv search action_input: {action_input}")
1065
+
1066
+ # Extract query and max_results
1067
+ query = ""
1068
+ max_results = 5 # Default
1069
+
1070
+ if isinstance(action_input, dict):
1071
+ query = action_input.get("query", "")
1072
+ if "max_results" in action_input:
1073
+ try:
1074
+ max_results = int(action_input["max_results"])
1075
+ if max_results <= 0 or max_results > 10:
1076
+ max_results = 5 # Reset to default if out of range
1077
+ except:
1078
+ print("Invalid max_results, using default")
1079
+ elif isinstance(action_input, str):
1080
+ query = action_input
1081
+
1082
+ print(f"Searching ArXiv for: '{query}' (max results: {max_results})")
1083
+
1084
+ # Safety check - don't run with empty query
1085
+ if not query:
1086
+ result = "Error: No search query provided. Please provide a valid query for ArXiv search."
1087
+ else:
1088
+ # Call the ArXiv search function
1089
+ result = arxiv_search(query, max_results)
1090
+
1091
+ print(f"ArXiv search result length: {len(result)}")
1092
 
1093
  # Format the observation to continue the ReAct cycle
 
1094
  tool_message = AIMessage(
1095
  content=f"Observation: {result.strip()}"
1096
  )
 
1122
  return "python_code"
1123
  elif tool == "webpage_scrape":
1124
  return "webpage_scrape"
1125
+ elif tool == "wikipedia_search":
1126
+ return "wikipedia_search"
1127
+ elif tool == "tavily_search":
1128
+ return "tavily_search"
1129
+ elif tool == "arxiv_search":
1130
+ return "arxiv_search"
1131
  else:
1132
  return "end"
1133
 
 
1141
  builder.add_node("web_search", web_search_node)
1142
  builder.add_node("python_code", python_code_node)
1143
  builder.add_node("webpage_scrape", webpage_scrape_node)
1144
+ builder.add_node("wikipedia_search", wikipedia_search_node)
1145
+ builder.add_node("tavily_search", tavily_search_node)
1146
+ builder.add_node("arxiv_search", arxiv_search_node)
1147
 
1148
  # Define edges: these determine how the control flow moves
1149
  builder.add_edge(START, "assistant")
 
1171
  "web_search": "web_search",
1172
  "python_code": "python_code",
1173
  "webpage_scrape": "webpage_scrape",
1174
+ "wikipedia_search": "wikipedia_search",
1175
+ "tavily_search": "tavily_search",
1176
+ "arxiv_search": "arxiv_search",
1177
  "end": END
1178
  }
1179
  )
 
1182
  builder.add_edge("web_search", "assistant")
1183
  builder.add_edge("python_code", "assistant")
1184
  builder.add_edge("webpage_scrape", "assistant")
1185
+ builder.add_edge("wikipedia_search", "assistant")
1186
+ builder.add_edge("tavily_search", "assistant")
1187
+ builder.add_edge("arxiv_search", "assistant")
1188
 
1189
  # Compile with a reasonable recursion limit to prevent infinite loops
1190
  return builder.compile()
 
1240
  # Example usage:
1241
  if __name__ == "__main__":
1242
  agent = TurboNerd(max_execution_time=60)
1243
+ response = agent("When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect? Use Tavily Search")
1244
  print("\nFinal Response:")
1245
  print(response)
1246
 
requirements.txt CHANGED
@@ -7,8 +7,4 @@ duckduckgo-search
7
  langchain-community
8
  apify-client
9
  beautifulsoup4
10
- html2text
11
- transformers
12
- pillow
13
- torch
14
- accelerate
 
7
  langchain-community
8
  apify-client
9
  beautifulsoup4
10
+ html2text