Deep_Research_Agent

Sleeping

App Files Files Community

Lasdw commited on May 14, 2025

Commit

942e3f2

1 Parent(s): 4a2c0eb

updated system prompt to all no tool to be called

Browse files

Files changed (3) hide show

.gitignore +12 -1
agent.py +88 -97
tools.py +625 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,14 @@
 .env
 .env.*
-image.png

 .env
 .env.*
+image.png
+GAIA
+GAIA/*
+pycache/*
+__pycache__/*
+*.pyc
+*.pyo
+*.pyd

agent.py CHANGED Viewed

@@ -4,29 +4,19 @@ from typing import TypedDict, Annotated, Dict, Any, Optional, Union, List
 from pathlib import Path
 from langgraph.graph.message import add_messages
 from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
-from langgraph.prebuilt import ToolNode
-from langchain.tools import Tool
 from langgraph.graph import START, END, StateGraph
-from langgraph.prebuilt import tools_condition
 from langchain_openai import ChatOpenAI
-from langchain_community.tools import DuckDuckGoSearchRun
-import getpass
-import subprocess
 import tempfile
-import time
 import random
 import json
-import re
 import requests
 from urllib.parse import quote, urlparse
-import sys
 from bs4 import BeautifulSoup
 import html2text
 import pandas as pd
 from tabulate import tabulate
 import base64
-from apify_client import ApifyClient
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.tools.tavily_search import TavilySearchResults
@@ -764,6 +754,8 @@ excel_to_text: Convert Excel to Markdown table with attachment, args: {"excel_pa
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
 Example use for tools:
 ```json
@@ -971,15 +963,9 @@ def assistant(state: AgentState) -> Dict[str, Any]:
     # Combine system message with the (potentially pruned) core messages
     messages_for_llm = [system_msg] + llm_input_core_messages
-    # Log the messages being sent to LLM for debugging
-    # print(f"Messages for LLM (count: {len(messages_for_llm)}):")
-    # for i, msg in enumerate(messages_for_llm):
-    #     print(f"  {i}: Type={type(msg).__name__}, Content='{str(msg.content)[:100].replace('\\n', ' ')}...'")
     # Get response from the assistant
     response = chat_with_tools.invoke(messages_for_llm, stop=["Observation:"])
     print(f"Assistant response type: {type(response)}")
-    # print(f"Response content (first 300 chars): {response.content[:300].replace('\n', ' ')}...")
     content_preview = response.content[:300].replace('\n', ' ')
     print(f"Response content (first 300 chars): {content_preview}...")
@@ -997,113 +983,118 @@ def assistant(state: AgentState) -> Dict[str, Any]:
     if action_json and "action" in action_json and "action_input" in action_json:
         tool_name = action_json["action"]
         tool_input = action_json["action_input"]
-        print(f"Extracted tool: {tool_name}")
         print(f"Tool input: {tool_input}")
         tool_call_id = f"call_{random.randint(1000000, 9999999)}"
         state_update["current_tool"] = tool_name
         state_update["action_input"] = tool_input
-        # state_update["tool_call_id"] = tool_call_id # If needed by your graph
     else:
         print("No tool action found or 'Final Answer' detected in response.")
         state_update["current_tool"] = None
         state_update["action_input"] = None
     return state_update
 def extract_json_from_text(text: str) -> dict:
     """Extract JSON from text, handling markdown code blocks."""
     try:
-        import re  # Import re at the beginning of the function
         print(f"Attempting to extract JSON from text: {text[:200]}...")
-        # Look for "Action:" followed by a markdown code block - common LLM output pattern
-        # This handles cases where the LLM outputs something like:
-        # Action:
-        # ```python
-        # code here
-        # ```
         action_match = re.search(r"Action:\s*```(?:python|json)?\s*(.*?)```", text, re.DOTALL)
         if action_match:
             action_content = action_match.group(1).strip()
             print(f"Found action content from markdown block: {action_content[:100]}...")
-            # If it looks like Python code, try to create a proper JSON structure
-            if "=" in action_content or "import" in action_content or "print" in action_content:
-                print("Detected Python code, formatting as action_input")
                 return {
-                    "action": "python_code",
-                    "action_input": {"code": action_content}
                 }
-        # Look for markdown code blocks - the most common pattern
-        if "```" in text:
-            print("Found markdown code block")
-            # Find all code blocks
-            blocks = []
-            lines = text.split('\n')
-            i = 0
-            while i < len(lines):
-                line = lines[i]
-                if "```" in line:
-                    # Start of code block
-                    start_idx = i + 1
-                    i += 1
-                    # Find the end of the code block
-                    while i < len(lines) and "```" not in lines[i]:
-                        i += 1
-                    if i < len(lines):
-                        # Found the end
-                        block_content = '\n'.join(lines[start_idx:i])
-                        blocks.append(block_content)
-                i += 1
-            # Try to parse each block as JSON
-            for block in blocks:
-                block = block.strip()
-                print(f"Trying to parse block: {block[:100]}...")
-                try:
-                    # Clean the block - sometimes there might be a language identifier
-                    if block.startswith("json"):
-                        block = block[4:].strip()
-                    # Validate JSON before parsing
-                    parsed = json.loads(block)
-                    print(f"Successfully parsed JSON: {parsed}")
-                    return parsed
-                except json.JSONDecodeError as e:
-                    print(f"JSON parse error: {e}")
-                    continue
-        # Look for JSON-like patterns in the text using a more precise regex
-        # Match balanced braces
-        # No need to import re again here
-        # Try to find JSON objects with proper brace matching
-        brace_count = 0
-        start_pos = -1
-        for i, char in enumerate(text):
-            if char == '{':
-                if brace_count == 0:
-                    start_pos = i
-                brace_count += 1
-            elif char == '}':
-                brace_count -= 1
-                if brace_count == 0 and start_pos >= 0:
-                    # Found a complete JSON object
-                    json_candidate = text[start_pos:i+1]
-                    try:
-                        parsed = json.loads(json_candidate)
-                        print(f"Found valid JSON: {parsed}")
-                        return parsed
-                    except json.JSONDecodeError:
-                        continue
-        # If we're here, we couldn't find a valid JSON object
-        print("Could not extract valid JSON from text")
         return None
     except Exception as e:

 from pathlib import Path
 from langgraph.graph.message import add_messages
 from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
 from langgraph.graph import START, END, StateGraph
 from langchain_openai import ChatOpenAI
 import tempfile
 import random
 import json
 import requests
 from urllib.parse import quote, urlparse
 from bs4 import BeautifulSoup
 import html2text
 import pandas as pd
 from tabulate import tabulate
 import base64
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.tools.tavily_search import TavilySearchResults
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
+If you do not want to use any tool AND have not yet arrived at a solution, call the python_code tool with an empty string as the code.
 Example use for tools:
 ```json
     # Combine system message with the (potentially pruned) core messages
     messages_for_llm = [system_msg] + llm_input_core_messages
     # Get response from the assistant
     response = chat_with_tools.invoke(messages_for_llm, stop=["Observation:"])
     print(f"Assistant response type: {type(response)}")
     content_preview = response.content[:300].replace('\n', ' ')
     print(f"Response content (first 300 chars): {content_preview}...")
     if action_json and "action" in action_json and "action_input" in action_json:
         tool_name = action_json["action"]
         tool_input = action_json["action_input"]
+        # Handle nested JSON issue - if action_input is a string containing JSON
+        if tool_name == "python_code" and isinstance(tool_input, dict) and "code" in tool_input:
+            code = tool_input["code"]
+            if code.startswith("{") and ("action" in code or "action_input" in code):
+                try:
+                    # Try to see if this is a nested JSON structure
+                    nested_json = json.loads(code)
+                    if isinstance(nested_json, dict) and "action" in nested_json and "action_input" in nested_json:
+                        # Replace with the nested structure
+                        tool_name = nested_json["action"]
+                        tool_input = nested_json["action_input"]
+                        print(f"Unwrapped nested JSON. New tool: {tool_name}")
+                        print(f"New tool input: {tool_input}")
+                except:
+                    # If it fails, keep original values
+                    pass
+        print(f"Using tool: {tool_name}")
         print(f"Tool input: {tool_input}")
         tool_call_id = f"call_{random.randint(1000000, 9999999)}"
         state_update["current_tool"] = tool_name
         state_update["action_input"] = tool_input
     else:
         print("No tool action found or 'Final Answer' detected in response.")
         state_update["current_tool"] = None
         state_update["action_input"] = None
     return state_update
 def extract_json_from_text(text: str) -> dict:
     """Extract JSON from text, handling markdown code blocks."""
     try:
+        import re
         print(f"Attempting to extract JSON from text: {text[:200]}...")
+        # First, clean up the text to handle specific patterns that might confuse parsing
+        text = text.replace('\\n', '\n').replace('\\"', '"')
+        # Pattern 1: Look for "Action:" followed by a markdown code block
         action_match = re.search(r"Action:\s*```(?:python|json)?\s*(.*?)```", text, re.DOTALL)
         if action_match:
             action_content = action_match.group(1).strip()
             print(f"Found action content from markdown block: {action_content[:100]}...")
+            # Try to parse as JSON first
+            try:
+                parsed_json = json.loads(action_content)
+                if "action" in parsed_json and "action_input" in parsed_json:
+                    return parsed_json
+            except json.JSONDecodeError:
+                # If it's Python code, create action structure
+                if "=" in action_content or "import" in action_content or "print" in action_content:
+                    print("Detected Python code, formatting as action_input")
+                    return {
+                        "action": "python_code",
+                        "action_input": {"code": action_content}
+                    }
+        # Pattern 2: Look for regular markdown code blocks
+        code_blocks = re.findall(r"```(?:json|python)?(.+?)```", text, re.DOTALL)
+        for block in code_blocks:
+            block = block.strip()
+            print(f"Processing code block: {block[:100]}...")
+            # Try to parse as JSON
+            try:
+                parsed = json.loads(block)
+                if "action" in parsed and "action_input" in parsed:
+                    print(f"Successfully parsed JSON block: {parsed}")
+                    return parsed
+            except json.JSONDecodeError:
+                # If it's Python code, create action structure
+                if "=" in block or "import" in block or "print" in block or "def " in block:
+                    print("Detected Python code in block, formatting as action_input")
+                    return {
+                        "action": "python_code",
+                        "action_input": {"code": block}
+                    }
+        # Pattern 3: Direct JSON object ({...}) in the text
+        json_matches = re.findall(r"\{[\s\S]*?\}", text)
+        for json_str in json_matches:
+            try:
+                parsed = json.loads(json_str)
+                if "action" in parsed and "action_input" in parsed:
+                    print(f"Found valid JSON object: {parsed}")
+                    return parsed
+            except json.JSONDecodeError:
+                continue
+        # Pattern 4: Look for patterns like 'action': 'tool_name', 'action_input': {...}
+        action_pattern = re.search(r"['\"](action)['\"]:\s*['\"](\w+)['\"]", text)
+        action_input_pattern = re.search(r"['\"](action_input)['\"]:\s*(\{.+\})", text, re.DOTALL)
+        if action_pattern and action_input_pattern:
+            action = action_pattern.group(2)
+            action_input_str = action_input_pattern.group(2)
+            try:
+                action_input = json.loads(action_input_str)
                 return {
+                    "action": action,
+                    "action_input": action_input
                 }
+            except json.JSONDecodeError:
+                pass
+        print("Could not extract valid JSON from text using any pattern")
         return None
     except Exception as e:

tools.py ADDED Viewed

	@@ -0,0 +1,625 @@

+import os
+from dotenv import load_dotenv
+from typing import Dict, Any, Optional, Union, List
+from pathlib import Path
+import tempfile
+import base64
+import json
+import requests
+from urllib.parse import urlparse
+from bs4 import BeautifulSoup
+import html2text
+import pandas as pd
+from tabulate import tabulate
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_community.tools.tavily_search import TavilySearchResults
+from supabase import create_client, Client
+load_dotenv()
+def run_python_code(code: str):
+    """Execute Python code safely using exec() instead of subprocess."""
+    # Check for potentially dangerous operations
+    dangerous_operations = [
+        "os.system", "os.popen", "os.unlink", "os.remove",
+        "subprocess.run", "subprocess.call", "subprocess.Popen",
+        "shutil.rmtree", "shutil.move", "shutil.copy",
+        "open(", "file(", "eval(", "exec(",
+        "__import__", "input(", "raw_input(",
+        "__builtins__", "globals(", "locals(",
+        "compile(", "execfile(", "reload("
+    ]
+    # Safe imports that should be allowed
+    safe_imports = {
+        "import datetime", "import math", "import random",
+        "import statistics", "import collections", "import itertools",
+        "import re", "import json", "import csv", "import numpy",
+        "import pandas", "from math import", "from datetime import",
+        "from statistics import", "from collections import",
+        "from itertools import"
+    }
+    # Check for dangerous operations
+    for dangerous_op in dangerous_operations:
+        if dangerous_op in code:
+            return f"Error: Code contains potentially unsafe operations: {dangerous_op}"
+    # Check each line for imports
+    for line in code.splitlines():
+        line = line.strip()
+        if line.startswith("import ") or line.startswith("from "):
+            # Check if it's in our safe list
+            is_safe = any(line.startswith(safe_import) for safe_import in safe_imports)
+            # Also allow basic numpy/pandas imports
+            is_safe = is_safe or line.startswith("import numpy") or line.startswith("import pandas")
+            if not is_safe:
+                return f"Error: Code contains potentially unsafe import: {line}"
+    try:
+        # Capture stdout to get print output
+        import io
+        import sys
+        from contextlib import redirect_stdout
+        # Create a restricted globals environment
+        restricted_globals = {
+            '__builtins__': {
+                'abs': abs, 'all': all, 'any': any, 'bin': bin, 'bool': bool,
+                'chr': chr, 'dict': dict, 'dir': dir, 'divmod': divmod,
+                'enumerate': enumerate, 'filter': filter, 'float': float,
+                'format': format, 'hex': hex, 'int': int, 'len': len,
+                'list': list, 'map': map, 'max': max, 'min': min, 'oct': oct,
+                'ord': ord, 'pow': pow, 'print': print, 'range': range,
+                'reversed': reversed, 'round': round, 'set': set, 'slice': slice,
+                'sorted': sorted, 'str': str, 'sum': sum, 'tuple': tuple,
+                'type': type, 'zip': zip,
+            }
+        }
+        # Allow safe modules
+        import math
+        import datetime
+        import random
+        import statistics
+        import collections
+        import itertools
+        import re
+        import json
+        import csv
+        restricted_globals['math'] = math
+        restricted_globals['datetime'] = datetime
+        restricted_globals['random'] = random
+        restricted_globals['statistics'] = statistics
+        restricted_globals['collections'] = collections
+        restricted_globals['itertools'] = itertools
+        restricted_globals['re'] = re
+        restricted_globals['json'] = json
+        restricted_globals['csv'] = csv
+        # Try to import numpy and pandas if available
+        try:
+            import numpy as np
+            restricted_globals['numpy'] = np
+            restricted_globals['np'] = np
+        except ImportError:
+            pass
+        try:
+            import pandas as pd
+            restricted_globals['pandas'] = pd
+            restricted_globals['pd'] = pd
+        except ImportError:
+            pass
+        # Create local scope
+        local_scope = {}
+        # Capture stdout
+        captured_output = io.StringIO()
+        # Execute the entire code block at once
+        with redirect_stdout(captured_output):
+            # Try to evaluate as expression first (for simple expressions)
+            lines = code.strip().split('\n')
+            if len(lines) == 1 and not any(keyword in code for keyword in ['=', 'import', 'from', 'def', 'class', 'if', 'for', 'while', 'try', 'with']):
+                try:
+                    result = eval(code, restricted_globals, local_scope)
+                    print(f"Result: {result}")
+                except:
+                    # If eval fails, use exec
+                    exec(code, restricted_globals, local_scope)
+            else:
+                # For multi-line code, execute the entire block
+                exec(code, restricted_globals, local_scope)
+        # Get the captured output
+        output = captured_output.getvalue()
+        if output.strip():
+            return output.strip()
+        else:
+            # If no output, check if there's a result from the last expression
+            lines = code.strip().split('\n')
+            last_line = lines[-1].strip() if lines else ""
+            # If the last line looks like an expression, try to evaluate it
+            if last_line and not any(keyword in last_line for keyword in ['=', 'import', 'from', 'def', 'class', 'if', 'for', 'while', 'try', 'with', 'print']):
+                try:
+                    result = eval(last_line, restricted_globals, local_scope)
+                    return f"Result: {result}"
+                except:
+                    pass
+            return "Code executed successfully with no output."
+    except SyntaxError as e:
+        return f"Syntax Error: {str(e)}"
+    except NameError as e:
+        return f"Name Error: {str(e)}"
+    except ZeroDivisionError as e:
+        return f"Zero Division Error: {str(e)}"
+    except Exception as e:
+        return f"Error executing code: {str(e)}"
+def scrape_webpage(url: str) -> str:
+    """
+    Safely scrape content from a specified URL.
+    Args:
+        url: The URL to scrape
+    Returns:
+        Formatted webpage content as text
+    """
+    # Check if the URL is valid
+    try:
+        # Parse the URL to validate it
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme or not parsed_url.netloc:
+            return f"Error: Invalid URL format: {url}. Please provide a valid URL with http:// or https:// prefix."
+        # Block potentially dangerous URLs
+        blocked_domains = [
+            "localhost", "127.0.0.1", "0.0.0.0",
+            "192.168.", "10.0.", "172.16.", "172.17.", "172.18.", "172.19.", "172.20.",
+            "172.21.", "172.22.", "172.23.", "172.24.", "172.25.", "172.26.", "172.27.",
+            "172.28.", "172.29.", "172.30.", "172.31."
+        ]
+        if any(domain in parsed_url.netloc for domain in blocked_domains):
+            return f"Error: Access to internal/local URLs is blocked for security: {url}"
+        print(f"Scraping URL: {url}")
+        # Set user agent to avoid being blocked
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+            'Cache-Control': 'max-age=0',
+        }
+        # Set a reasonable timeout to avoid hanging
+        timeout = 10
+        # Make the request
+        response = requests.get(url, headers=headers, timeout=timeout)
+        # Check if request was successful
+        if response.status_code != 200:
+            return f"Error: Failed to fetch the webpage. Status code: {response.status_code}"
+        # Use BeautifulSoup to parse the HTML
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove script and style elements that are not relevant to content
+        for script_or_style in soup(["script", "style", "iframe", "footer", "nav"]):
+            script_or_style.decompose()
+        # Get the page title
+        title = soup.title.string if soup.title else "No title found"
+        # Extract the main content
+        # First try to find main content areas
+        main_content = soup.find('main') or soup.find('article') or soup.find(id='content') or soup.find(class_='content')
+        # If no main content area is found, use the entire body
+        if not main_content:
+            main_content = soup.body
+        # Convert to plain text
+        h = html2text.HTML2Text()
+        h.ignore_links = False
+        h.ignore_images = True
+        h.ignore_tables = False
+        h.unicode_snob = True
+        if main_content:
+            text_content = h.handle(str(main_content))
+        else:
+            text_content = h.handle(response.text)
+        # Limit content length to avoid overwhelming the model
+        max_content_length = 99999999999
+        if len(text_content) > max_content_length:
+            text_content = text_content[:max_content_length] + "\n\n[Content truncated due to length...]"
+        # Format the response
+        result = f"Title: {title}\nURL: {url}\n\n{text_content}"
+        return result
+    except requests.exceptions.Timeout:
+        return f"Error: Request timed out while trying to access {url}"
+    except requests.exceptions.ConnectionError:
+        return f"Error: Failed to connect to {url}. The site might be down or the URL might be incorrect."
+    except requests.exceptions.RequestException as e:
+        return f"Error requesting {url}: {str(e)}"
+    except Exception as e:
+        return f"Error scraping webpage {url}: {str(e)}"
+def wikipedia_search(query: str, num_results: int = 3) -> str:
+    """
+    Search Wikipedia for information about a specific query.
+    Args:
+        query: Search query
+        num_results: Number of search results to return (default: 3)
+    Returns:
+        Formatted Wikipedia search results
+    """
+    try:
+        # Validate input
+        if not query or not isinstance(query, str):
+            return "Error: Please provide a valid search query."
+        # Ensure num_results is valid
+        try:
+            num_results = int(num_results)
+            if num_results <= 0:
+                num_results = 3  # Default to 3 if invalid
+        except:
+            num_results = 3  # Default to 3 if conversion fails
+        print(f"Searching Wikipedia for: {query}")
+        # Use WikipediaLoader from LangChain
+        loader = WikipediaLoader(query=query, load_max_docs=num_results)
+        docs = loader.load()
+        if not docs:
+            return f"No Wikipedia results found for '{query}'. Try refining your search."
+        # Format the results
+        formatted_results = f"Wikipedia search results for '{query}':\n\n"
+        for i, doc in enumerate(docs, 1):
+            title = doc.metadata.get('title', 'Unknown Title')
+            source = doc.metadata.get('source', 'No URL')
+            content = doc.page_content
+            # Truncate content if too long
+            if len(content) > 500:
+                content = content[:500] + "..."
+            formatted_results += f"{i}. {title}\n"
+            formatted_results += f"   URL: {source}\n"
+            formatted_results += f"   {content}\n\n"
+        return formatted_results
+    except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}"
+def tavily_search(query: str, search_depth: str = "basic") -> str:
+    """
+    Search the web using the Tavily Search API.
+    Args:
+        query: Search query
+        search_depth: Depth of search ('basic' or 'comprehensive')
+    Returns:
+        Formatted search results from Tavily
+    """
+    try:
+        # Check for API key
+        tavily_api_key = os.environ.get("TAVILY_API_KEY")
+        if not tavily_api_key:
+            return "Error: Tavily API key not found. Please set the TAVILY_API_KEY environment variable."
+        # Validate input
+        if not query or not isinstance(query, str):
+            return "Error: Please provide a valid search query."
+        # Validate search_depth
+        if search_depth not in ["basic", "comprehensive"]:
+            search_depth = "basic"  # Default to basic if invalid
+        print(f"Searching Tavily for: {query} (depth: {search_depth})")
+        # Initialize the Tavily search tool
+        search = TavilySearchResults(api_key=tavily_api_key)
+        # Execute the search
+        results = search.invoke({"query": query, "search_depth": search_depth})
+        if not results:
+            return f"No Tavily search results found for '{query}'. Try refining your search."
+        # Format the results
+        formatted_results = f"Tavily search results for '{query}':\n\n"
+        for i, result in enumerate(results, 1):
+            formatted_results += f"{i}. {result.get('title', 'No title')}\n"
+            formatted_results += f"   URL: {result.get('url', 'No URL')}\n"
+            formatted_results += f"   {result.get('content', 'No content')}\n\n"
+        return formatted_results
+    except Exception as e:
+        return f"Error searching with Tavily: {str(e)}"
+def arxiv_search(query: str, max_results: int = 5) -> str:
+    """
+    Search ArXiv for scientific papers matching the query.
+    Args:
+        query: Search query for ArXiv
+        max_results: Maximum number of results to return
+    Returns:
+        Formatted ArXiv search results
+    """
+    try:
+        # Validate input
+        if not query or not isinstance(query, str):
+            return "Error: Please provide a valid search query."
+        # Ensure max_results is valid
+        try:
+            max_results = int(max_results)
+            if max_results <= 0 or max_results > 10:
+                max_results = 5  # Default to 5 if invalid or too large
+        except:
+            max_results = 5  # Default to 5 if conversion fails
+        print(f"Searching ArXiv for: {query}")
+        # Use ArxivLoader from LangChain
+        loader = ArxivLoader(
+            query=query,
+            load_max_docs=max_results,
+            load_all_available_meta=True
+        )
+        docs = loader.load()
+        if not docs:
+            return f"No ArXiv papers found for '{query}'. Try refining your search."
+        # Format the results
+        formatted_results = f"ArXiv papers for '{query}':\n\n"
+        for i, doc in enumerate(docs, 1):
+            meta = doc.metadata
+            title = meta.get('Title', 'Unknown Title')
+            url = meta.get('Entry ID', 'No URL')
+            authors = meta.get('Authors', 'Unknown Authors')
+            published = meta.get('Published', 'Unknown Date')
+            formatted_results += f"{i}. {title}\n"
+            formatted_results += f"   URL: {url}\n"
+            formatted_results += f"   Authors: {authors}\n"
+            formatted_results += f"   Published: {published}\n"
+            # Add abstract, truncated if too long
+            abstract = doc.page_content.replace('\n', ' ')
+            if len(abstract) > 300:
+                abstract = abstract[:300] + "..."
+            formatted_results += f"   Abstract: {abstract}\n\n"
+        return formatted_results
+    except Exception as e:
+        return f"Error searching ArXiv: {str(e)}"
+def supabase_operation(operation_type: str, table: str, data: dict = None, filters: dict = None) -> str:
+    """
+    Perform operations on Supabase database.
+    Args:
+        operation_type: Type of operation ('insert', 'select', 'update', 'delete')
+        table: Name of the table to operate on
+        data: Data to insert/update (for insert/update operations)
+        filters: Filters for select/update/delete operations (e.g., {"id": 1})
+    Returns:
+        Result of the operation as a formatted string
+    """
+    try:
+        # Get Supabase credentials from environment variables
+        supabase_url = os.environ.get("SUPABASE_URL")
+        supabase_key = os.environ.get("SUPABASE_ANON_KEY")
+        if not supabase_url or not supabase_key:
+            return "Error: Supabase credentials not found. Please set SUPABASE_URL and SUPABASE_ANON_KEY environment variables."
+        # Create Supabase client
+        supabase: Client = create_client(supabase_url, supabase_key)
+        # Validate inputs
+        if not table:
+            return "Error: Table name is required."
+        if operation_type not in ['insert', 'select', 'update', 'delete']:
+            return "Error: Invalid operation type. Use 'insert', 'select', 'update', or 'delete'."
+        # Perform the operation based on type
+        if operation_type == 'insert':
+            if not data:
+                return "Error: Data is required for insert operation."
+            result = supabase.table(table).insert(data).execute()
+            return f"Insert successful: {len(result.data)} row(s) inserted into {table}"
+        elif operation_type == 'select':
+            query = supabase.table(table).select("*")
+            # Apply filters if provided
+            if filters:
+                for key, value in filters.items():
+                    query = query.eq(key, value)
+            result = query.execute()
+            return f"Select successful: Found {len(result.data)} row(s) in {table}\nData: {json.dumps(result.data, indent=2)}"
+        elif operation_type == 'update':
+            if not data or not filters:
+                return "Error: Both data and filters are required for update operation."
+            query = supabase.table(table).update(data)
+            # Apply filters
+            for key, value in filters.items():
+                query = query.eq(key, value)
+            result = query.execute()
+            return f"Update successful: {len(result.data)} row(s) updated in {table}"
+        elif operation_type == 'delete':
+            if not filters:
+                return "Error: Filters are required for delete operation."
+            query = supabase.table(table).delete()
+            # Apply filters
+            for key, value in filters.items():
+                query = query.eq(key, value)
+            result = query.execute()
+            return f"Delete successful: Rows deleted from {table}"
+    except Exception as e:
+        return f"Error performing Supabase operation: {str(e)}"
+def excel_to_text(excel_path: str, sheet_name: Optional[str] = None, file_content: Optional[bytes] = None) -> str:
+    """
+    Read an Excel file and return a Markdown table of the requested sheet.
+    Args:
+        excel_path: Path to the Excel file (.xlsx or .xls) or name for the attached file.
+        sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
+        file_content: Optional binary content of the file if provided as an attachment.
+    Returns:
+        A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
+    """
+    try:
+        # Handle file attachment case
+        if file_content:
+            # Create a temporary file to save the attachment
+            with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp_file:
+                temp_file.write(file_content)
+                temp_path = temp_file.name
+            print(f"Saved attached Excel file to temporary location: {temp_path}")
+            file_path = Path(temp_path)
+        else:
+            # Regular file path case
+            file_path = Path(excel_path).expanduser().resolve()
+            if not file_path.is_file():
+                return f"Error: Excel file not found at {file_path}"
+        # Process the Excel file
+        sheet: Union[str, int] = (
+            int(sheet_name)
+            if sheet_name and sheet_name.isdigit()
+            else sheet_name or 0
+        )
+        df = pd.read_excel(file_path, sheet_name=sheet)
+        # Clean up temporary file if we created one
+        if file_content and os.path.exists(temp_path):
+            os.unlink(temp_path)
+            print(f"Deleted temporary Excel file: {temp_path}")
+        if hasattr(df, "to_markdown"):
+            return df.to_markdown(index=False)
+        return tabulate(df, headers="keys", tablefmt="github", showindex=False)
+    except Exception as e:
+        # Clean up temporary file in case of error
+        if file_content and 'temp_path' in locals() and os.path.exists(temp_path):
+            os.unlink(temp_path)
+            print(f"Deleted temporary Excel file due to error: {temp_path}")
+        return f"Error reading Excel file: {e}"
+def save_attachment_to_tempfile(file_content_b64: str, file_extension: str = '.xlsx') -> str:
+    """
+    Decode a base64 file content and save it to a temporary file.
+    Args:
+        file_content_b64: Base64 encoded file content
+        file_extension: File extension to use for the temporary file
+    Returns:
+        Path to the saved temporary file
+    """
+    try:
+        # Decode the base64 content
+        file_content = base64.b64decode(file_content_b64)
+        # Create a temporary file with the appropriate extension
+        with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file:
+            temp_file.write(file_content)
+            temp_path = temp_file.name
+        print(f"Saved attachment to temporary file: {temp_path}")
+        return temp_path
+    except Exception as e:
+        print(f"Error saving attachment: {e}")
+        return None
+# Define the tools configuration
+tools_config = [
+    {
+        "name": "python_code",
+        "description": "Execute Python code. Provide the complete Python code as a string in the format: {\"code\": \"your python code here\"}",
+        "func": run_python_code
+    },
+    {
+        "name": "wikipedia_search",
+        "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
+        "func": wikipedia_search
+    },
+    {
+        "name": "tavily_search",
+        "description": "Search the web using Tavily for more comprehensive results. Provide a query in the format: {\"query\": \"your search query\", \"search_depth\": \"basic\"}",
+        "func": tavily_search
+    },
+    {
+        "name": "arxiv_search",
+        "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
+        "func": arxiv_search
+    },
+    {
+        "name": "supabase_operation",
+        "description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
+        "func": supabase_operation
+    },
+    {
+        "name": "excel_to_text",
+        "description": "Read an Excel file and return a Markdown table. You can provide either the path to an Excel file or use a file attachment. For attachments, provide a base64-encoded string of the file content and a filename.",
+        "func": excel_to_text
+    }
+]