Deep_Research_Agent

Sleeping

App Files Files Community

Lasdw commited on May 14, 2025

Commit

85c9819

1 Parent(s): 9203ad7

Remove the use of APIfy search

Browse files

Files changed (2) hide show

agent.py +376 -245
requirements.txt +2 -1

agent.py CHANGED Viewed

@@ -26,6 +26,7 @@ from apify_client import ApifyClient
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.tools.tavily_search import TavilySearchResults  # For Tavily search
 load_dotenv()
@@ -116,60 +117,60 @@ def run_python_code(code: str):
         return f"Error executing code: {str(e)}"
 # Apify-based search function
-def apify_google_search(query: str, limit: int = 10) -> str:
-    """
-    Use Apify's Google Search Results Scraper to get search results
-    Args:
-        query: The search query string
-        limit: Number of results to return (10, 20, 30, 40, 50, 100)
-    Returns:
-        Formatted search results as a string
-    """
-    # You would need to provide a valid Apify API token
-    # You can get one by signing up at https://apify.com/
-    # Replace this with your actual Apify API token or set as environment variable
-    APIFY_API_TOKEN = os.environ.get("APIFY_API_TOKEN", "")
-    if not APIFY_API_TOKEN:
-        print("No Apify API token found. Using fallback search method.")
-        return fallback_search(query)
-    try:
-        # Initialize the ApifyClient with API token
-        client = ApifyClient(APIFY_API_TOKEN)
-        # Prepare the Actor input - convert limit to string as required by the API
-        run_input = {
-            "keyword": query,
-            "limit": str(limit),  # Convert to string as required by the API
-            "country": "US"
-        }
-        # The Actor ID for the Google Search Results Scraper
-        ACTOR_ID = "563JCPLOqM1kMmbbP"
-        print(f"Starting Apify search for: '{query}'")
-        # Run the Actor and wait for it to finish (with timeout)
-        run = client.actor(ACTOR_ID).call(run_input=run_input, timeout_secs=60)
-        if not run or not run.get("defaultDatasetId"):
-            print("Failed to get results from Apify actor")
-            return fallback_search(query)
-        # Fetch Actor results from the run's dataset
-        results = []
-        for item in client.dataset(run["defaultDatasetId"]).iterate_items():
-            results.append(item)
-        # Format and return the results
-        return format_search_results(results, query)
-    except Exception as e:
-        print(f"Error using Apify: {str(e)}")
-        return fallback_search(query)
 def scrape_webpage(url: str) -> str:
     """
@@ -270,103 +271,105 @@ def scrape_webpage(url: str) -> str:
     except Exception as e:
         return f"Error scraping webpage {url}: {str(e)}"
-def format_search_results(results: List[Dict], query: str) -> str:
-    """Format the search results into a readable string"""
-    if not results or len(results) == 0:
-        return f"No results found for query: {query}"
-    print(f"Raw search results: {str(results)[:1000]}...")
-    # Extract search results from the Apify output
-    formatted_results = f"Search results for '{query}':\n\n"
-    # Check if results is a list of dictionaries or a dictionary with nested results
-    if isinstance(results, dict) and "results" in results:
-        items = results["results"]
-    elif isinstance(results, list):
-        items = results
-    else:
-        return f"Unable to process results for query: {query}"
-    # Handle different Apify result formats
-    if len(items) > 0:
-        # Check the structure of the first item to determine format
-        first_item = items[0]
-        # If item has 'organicResults', this is the format from some Apify actors
-        if isinstance(first_item, dict) and "organicResults" in first_item:
-            organic_results = first_item.get("organicResults", [])
-            for i, result in enumerate(organic_results[:10], 1):
-                if "title" in result and "url" in result:
-                    formatted_results += f"{i}. {result['title']}\n"
-                    formatted_results += f"   URL: {result['url']}\n"
-                    if "snippet" in result:
-                        formatted_results += f"   {result['snippet']}\n"
-                    formatted_results += "\n"
-        else:
-            # Standard format with title/url/description
-            for i, result in enumerate(items[:10], 1):
-                if "title" in result and "url" in result:
-                    formatted_results += f"{i}. {result['title']}\n"
-                    formatted_results += f"   URL: {result['url']}\n"
-                    if "description" in result:
-                        formatted_results += f"   {result['description']}\n"
-                    elif "snippet" in result:
-                        formatted_results += f"   {result['snippet']}\n"
-                    formatted_results += "\n"
-    return formatted_results
-def fallback_search(query: str) -> str:
-    """Fallback search method using DuckDuckGo when Apify is not available"""
-    try:
-        search_tool = DuckDuckGoSearchRun()
-        result = search_tool.invoke(query)
-        return "Observation: " + result
-    except Exception as e:
-        return f"Search error: {str(e)}. Please try a different query or method."
-# Custom search function with improved error handling
-def safe_web_search(query: str) -> str:
-    """Search the web safely with error handling and retry logic."""
-    if not query:
-        return "Error: No search query provided. Please specify what you want to search for."
-    # Try using Apify first, if it fails it will use the fallback
-    return "Observation: " + apify_google_search(query)
-    # The code below is kept for reference but won't be executed
-    max_retries = 3
-    backoff_factor = 1.5
-    for attempt in range(max_retries):
-        try:
-            # Use the DuckDuckGoSearchRun tool
-            search_tool = DuckDuckGoSearchRun()
-            result = search_tool.invoke(query)
-            # If we get an empty result, provide a helpful message
-            if not result or len(result.strip()) < 10:
-                return f"The search for '{query}' did not return any useful results. Please try a more specific query or a different search engine."
-            return "Observation: " + result
-        except Exception as e:
-            # If we're being rate limited
-            if "Ratelimit" in str(e) or "429" in str(e):
-                if attempt < max_retries - 1:
-                    wait_time = backoff_factor ** attempt
-                    print(f"Rate limited, waiting {wait_time:.2f} seconds before retrying...")
-                    time.sleep(wait_time)
-                else:
-                    # On last attempt, return a helpful error
-                    error_msg = f"I'm currently unable to search for '{query}' due to service rate limits. "
-                    return error_msg
-            else:
-                # For other types of errors
-                return f"Error while searching for '{query}': {str(e)}"
-    return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
 def wikipedia_search(query: str, num_results: int = 3) -> str:
     """
@@ -535,25 +538,97 @@ def arxiv_search(query: str, max_results: int = 5) -> str:
     except Exception as e:
         return f"Error searching ArXiv: {str(e)}"
 # System prompt to guide the model's behavior
 SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
 python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
 wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
 tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
 arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
-web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
-webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
-IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
-1. First reason about the problem in the "Thought" section
-2. Then decide what action to take in the "Action" section (using the tools)
-3. Wait for an observation from the tool
-4. Based on the observation, continue with another thought
-5. This cycle repeats until you have enough information to provide a final answer
-NEVER fake or simulate tool output yourself. You can try to use the tools multiple times if needed and try using multiple tools if needed.
-Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
 The way you use the tools is by specifying a json blob.
 Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
@@ -563,44 +638,48 @@ python_code: Execute Python code, args: {"code": {"type": "string"}}
 wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
 tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
 arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
-web_search: Search the web for current information, args: {"query": {"type": "string"}}
 webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
-example use:
 ```json
 {
-  "action": "web_search",
-  "action_input": {"query": "population of New York City"}
 }
 ```
-Or for scraping a webpage:
 ```json
 {
-  "action": "webpage_scrape",
-  "action_input": {"url": "https://en.wikipedia.org/wiki/Artificial_intelligence"}
 }
 ```
-Or for searching Wikipedia:
 ```json
 {
-  "action": "wikipedia_search",
-  "action_input": {"query": "quantum physics", "num_results": 3}
 }
 ```
 ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
 Question: [the user's question]
 Thought: [your reasoning about what to do next]
 Action:
 ```json
 {
@@ -608,11 +687,8 @@ Action:
   "action_input": {"[parameter_name]": "[parameter_value]"}
 }
 ```
 Observation: [the result from the tool will appear here]
 Thought: [your reasoning after seeing the observation]
 Action:
 ```json
 {
@@ -620,9 +696,7 @@ Action:
   "action_input": {"[parameter_name]": "[parameter_value]"}
 }
 ```
 Observation: [another tool result will appear here]
 IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
 1. First reason about the problem in the "Thought" section
 2. Then decide what action to take in the "Action" section (using the tools)
@@ -633,12 +707,9 @@ IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observ
 NEVER fake or simulate tool output yourself.
 ... (this Thought/Action/Observation cycle can repeat as needed) ...
 Thought: I now know the final answer
 Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
 Make sure to follow any formatting instructions given by the user.
 Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
 # Generate the chat interface, including the tools
@@ -650,21 +721,21 @@ llm = ChatOpenAI(
 chat = llm
 # Tools are defined but not bound to the LLM here
 tools_config = [
-    {
-        "name": "web_search",
-        "description": "Search the web for current information. Provide a specific search query in the format: {\"query\": \"your search query here\"}",
-        "func": safe_web_search
-    },
     {
         "name": "python_code",
         "description": "Execute Python code. Provide the complete Python code as a string in the format: {\"code\": \"your python code here\"}",
         "func": run_python_code
     },
-    {
-        "name": "webpage_scrape",
-        "description": "Scrape content from a specific webpage URL. Provide a valid URL in the format: {\"url\": \"https://example.com\"}",
-        "func": scrape_webpage
-    },
     {
         "name": "wikipedia_search",
         "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
@@ -679,6 +750,11 @@ tools_config = [
         "name": "arxiv_search",
         "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
         "func": arxiv_search
     }
 ]
@@ -813,51 +889,52 @@ def extract_json_from_text(text: str) -> dict:
         print(f"Error extracting JSON: {e}")
         return None
-def web_search_node(state: AgentState) -> Dict[str, Any]:
-    """Node that executes the web search tool."""
-    print("Web Search Tool Called...\n\n")
-    # Extract tool arguments
-    action_input = state.get("action_input", {})
-    print(f"Web search action_input: {action_input}")
-    # Try different ways to extract the query
-    query = ""
-    if isinstance(action_input, dict):
-        query = action_input.get("query", "")
-    elif isinstance(action_input, str):
-        query = action_input
-    print(f"Searching for: '{query}'")
-    # Call the search function with retry logic
-    result = safe_web_search(query)
-    print(f"Search result: {result}")  # Print the full result for debugging
-    # Check if we hit rate limits and add a helpful note
-    if "rate limit" in result.lower() or "ratelimit" in result.lower():
-        result += "\n\nNote: You can use your internal knowledge to provide a response since the search is rate limited."
-    # Format the observation to continue the ReAct cycle
-    # Don't include "Observation:" as the assistant is stopped at this token
-    observation = result
-    # Create a tool message with the result
-    tool_message = AIMessage(
-        content=f"Observation: {observation}"
-    )
-    # Print the observation that will be sent back to the assistant
-    print("\n=== TOOL OBSERVATION ===")
-    print(tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content)
-    print("=== END OBSERVATION ===\n")
-    # Return the updated state
-    return {
-        "messages": state["messages"] + [tool_message],
-        "current_tool": None,  # Reset the current tool
-        "action_input": None   # Clear the action input
-    }
 def python_code_node(state: AgentState) -> Dict[str, Any]:
     """Node that executes Python code."""
@@ -1102,6 +1179,55 @@ def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
         "action_input": None   # Clear the action input
     }
 # Router function to direct to the correct tool
 def router(state: AgentState) -> str:
     """Route to the appropriate tool based on the current_tool field."""
@@ -1110,9 +1236,9 @@ def router(state: AgentState) -> str:
     print(f"Routing to: {tool}")
     print(f"Router received action_input: {action_input}")
-    if tool == "web_search":
-        return "web_search"
-    elif tool == "python_code":
         return "python_code"
     elif tool == "webpage_scrape":
         return "webpage_scrape"
@@ -1122,6 +1248,8 @@ def router(state: AgentState) -> str:
         return "tavily_search"
     elif tool == "arxiv_search":
         return "arxiv_search"
     else:
         return "end"
@@ -1132,12 +1260,13 @@ def create_agent_graph() -> StateGraph:
     # Define nodes: these do the work
     builder.add_node("assistant", assistant)
-    builder.add_node("web_search", web_search_node)
     builder.add_node("python_code", python_code_node)
     builder.add_node("webpage_scrape", webpage_scrape_node)
     builder.add_node("wikipedia_search", wikipedia_search_node)
     builder.add_node("tavily_search", tavily_search_node)
     builder.add_node("arxiv_search", arxiv_search_node)
     # Define edges: these determine how the control flow moves
     builder.add_edge(START, "assistant")
@@ -1162,23 +1291,25 @@ def create_agent_graph() -> StateGraph:
         "debug",
         router,
         {
-            "web_search": "web_search",
             "python_code": "python_code",
             "webpage_scrape": "webpage_scrape",
             "wikipedia_search": "wikipedia_search",
             "tavily_search": "tavily_search",
             "arxiv_search": "arxiv_search",
             "end": END
         }
     )
     # Tools always go back to assistant
-    builder.add_edge("web_search", "assistant")
     builder.add_edge("python_code", "assistant")
     builder.add_edge("webpage_scrape", "assistant")
     builder.add_edge("wikipedia_search", "assistant")
     builder.add_edge("tavily_search", "assistant")
     builder.add_edge("arxiv_search", "assistant")
     # Compile the graph
     return builder.compile()
@@ -1234,7 +1365,7 @@ class TurboNerd:
 # Example usage:
 if __name__ == "__main__":
     agent = TurboNerd(max_execution_time=60)
-    response = agent("What is the last sentence of albert einstein's wikipedia page?")
     print("\nFinal Response:")
     print(response)

 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.tools.tavily_search import TavilySearchResults  # For Tavily search
+from supabase import create_client, Client
 load_dotenv()
         return f"Error executing code: {str(e)}"
 # Apify-based search function
+# def apify_google_search(query: str, limit: int = 10) -> str:
+#     """
+#     Use Apify's Google Search Results Scraper to get search results
+#
+#     Args:
+#         query: The search query string
+#         limit: Number of results to return (10, 20, 30, 40, 50, 100)
+#
+#     Returns:
+#         Formatted search results as a string
+#     """
+#     # You would need to provide a valid Apify API token
+#     # You can get one by signing up at https://apify.com/
+#     # Replace this with your actual Apify API token or set as environment variable
+#     APIFY_API_TOKEN = os.environ.get("APIFY_API_TOKEN", "")
+#
+#     if not APIFY_API_TOKEN:
+#         print("No Apify API token found. Using fallback search method.")
+#         return fallback_search(query)
+#
+#     try:
+#         # Initialize the ApifyClient with API token
+#         client = ApifyClient(APIFY_API_TOKEN)
+#
+#         # Prepare the Actor input - convert limit to string as required by the API
+#         run_input = {
+#             "keyword": query,
+#             "limit": str(limit),  # Convert to string as required by the API
+#             "country": "US"
+#         }
+#
+#         # The Actor ID for the Google Search Results Scraper
+#         ACTOR_ID = "563JCPLOqM1kMmbbP"
+#
+#         print(f"Starting Apify search for: '{query}'")
+#
+#         # Run the Actor and wait for it to finish (with timeout)
+#         run = client.actor(ACTOR_ID).call(run_input=run_input, timeout_secs=60)
+#
+#         if not run or not run.get("defaultDatasetId"):
+#             print("Failed to get results from Apify actor")
+#             return fallback_search(query)
+#
+#         # Fetch Actor results from the run's dataset
+#         results = []
+#         for item in client.dataset(run["defaultDatasetId"]).iterate_items():
+#             results.append(item)
+#
+#         # Format and return the results
+#         return format_search_results(results, query)
+#
+#     except Exception as e:
+#         print(f"Error using Apify: {str(e)}")
+#         return fallback_search(query)
 def scrape_webpage(url: str) -> str:
     """
     except Exception as e:
         return f"Error scraping webpage {url}: {str(e)}"
+# Comment out the format_search_results function (around line 180)
+# def format_search_results(results: List[Dict], query: str) -> str:
+#     """Format the search results into a readable string"""
+#     if not results or len(results) == 0:
+#         return f"No results found for query: {query}"
+#
+#     print(f"Raw search results: {str(results)[:1000]}...")
+#
+#     # Extract search results from the Apify output
+#     formatted_results = f"Search results for '{query}':\n\n"
+#
+#     # Check if results is a list of dictionaries or a dictionary with nested results
+#     if isinstance(results, dict) and "results" in results:
+#         items = results["results"]
+#     elif isinstance(results, list):
+#         items = results
+#     else:
+#         return f"Unable to process results for query: {query}"
+#
+#     # Handle different Apify result formats
+#     if len(items) > 0:
+#         # Check the structure of the first item to determine format
+#         first_item = items[0]
+#
+#         # If item has 'organicResults', this is the format from some Apify actors
+#         if isinstance(first_item, dict) and "organicResults" in first_item:
+#             organic_results = first_item.get("organicResults", [])
+#             for i, result in enumerate(organic_results[:10], 1):
+#                 if "title" in result and "url" in result:
+#                     formatted_results += f"{i}. {result['title']}\n"
+#                     formatted_results += f"   URL: {result['url']}\n"
+#                     if "snippet" in result:
+#                         formatted_results += f"   {result['snippet']}\n"
+#                     formatted_results += "\n"
+#         else:
+#             # Standard format with title/url/description
+#             for i, result in enumerate(items[:10], 1):
+#                 if "title" in result and "url" in result:
+#                     formatted_results += f"{i}. {result['title']}\n"
+#                     formatted_results += f"   URL: {result['url']}\n"
+#                     if "description" in result:
+#                         formatted_results += f"   {result['description']}\n"
+#                     elif "snippet" in result:
+#                         formatted_results += f"   {result['snippet']}\n"
+#                     formatted_results += "\n"
+#
+#     return formatted_results
+# Comment out the fallback_search function (around line 220)
+# def fallback_search(query: str) -> str:
+#     """Fallback search method using DuckDuckGo when Apify is not available"""
+#     try:
+#         search_tool = DuckDuckGoSearchRun()
+#         result = search_tool.invoke(query)
+#         return "Observation: " + result
+#     except Exception as e:
+#         return f"Search error: {str(e)}. Please try a different query or method."
+# Comment out the safe_web_search function (around line 230)
+# def safe_web_search(query: str) -> str:
+#     """Search the web safely with error handling and retry logic."""
+#     if not query:
+#         return "Error: No search query provided. Please specify what you want to search for."
+#
+#     # Try using Apify first, if it fails it will use the fallback
+#     return "Observation: " + apify_google_search(query)
+#
+#     # The code below is kept for reference but won't be executed
+#     max_retries = 3
+#     backoff_factor = 1.5
+#
+#     for attempt in range(max_retries):
+#         try:
+#             # Use the DuckDuckGoSearchRun tool
+#             search_tool = DuckDuckGoSearchRun()
+#             result = search_tool.invoke(query)
+#
+#             # If we get an empty result, provide a helpful message
+#             if not result or len(result.strip()) < 10:
+#                 return f"The search for '{query}' did not return any useful results. Please try a more specific query or a different search engine."
+#
+#             return "Observation: " + result
+#
+#         except Exception as e:
+#             # If we're being rate limited
+#             if "Ratelimit" in str(e) or "429" in str(e):
+#                 if attempt < max_retries - 1:
+#                     wait_time = backoff_factor ** attempt
+#                     print(f"Rate limited, waiting {wait_time:.2f} seconds before retrying...")
+#                     time.sleep(wait_time)
+#                 else:
+#                     # On last attempt, return a helpful error
+#                     error_msg = f"I'm currently unable to search for '{query}' due to service rate limits. "
+#                     return error_msg
+#             else:
+#                 # For other types of errors
+#                 return f"Error while searching for '{query}': {str(e)}"
+#
+#     return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
 def wikipedia_search(query: str, num_results: int = 3) -> str:
     """
     except Exception as e:
         return f"Error searching ArXiv: {str(e)}"
+def supabase_operation(operation_type: str, table: str, data: dict = None, filters: dict = None) -> str:
+    """
+    Perform operations on Supabase database.
+    Args:
+        operation_type: Type of operation ('insert', 'select', 'update', 'delete')
+        table: Name of the table to operate on
+        data: Data to insert/update (for insert/update operations)
+        filters: Filters for select/update/delete operations (e.g., {"id": 1})
+    Returns:
+        Result of the operation as a formatted string
+    """
+    try:
+        # Get Supabase credentials from environment variables
+        supabase_url = os.environ.get("SUPABASE_URL")
+        supabase_key = os.environ.get("SUPABASE_ANON_KEY")
+        if not supabase_url or not supabase_key:
+            return "Error: Supabase credentials not found. Please set SUPABASE_URL and SUPABASE_ANON_KEY environment variables."
+        # Create Supabase client
+        supabase: Client = create_client(supabase_url, supabase_key)
+        # Validate inputs
+        if not table:
+            return "Error: Table name is required."
+        if operation_type not in ['insert', 'select', 'update', 'delete']:
+            return "Error: Invalid operation type. Use 'insert', 'select', 'update', or 'delete'."
+        # Perform the operation based on type
+        if operation_type == 'insert':
+            if not data:
+                return "Error: Data is required for insert operation."
+            result = supabase.table(table).insert(data).execute()
+            return f"Insert successful: {len(result.data)} row(s) inserted into {table}"
+        elif operation_type == 'select':
+            query = supabase.table(table).select("*")
+            # Apply filters if provided
+            if filters:
+                for key, value in filters.items():
+                    query = query.eq(key, value)
+            result = query.execute()
+            return f"Select successful: Found {len(result.data)} row(s) in {table}\nData: {json.dumps(result.data, indent=2)}"
+        elif operation_type == 'update':
+            if not data or not filters:
+                return "Error: Both data and filters are required for update operation."
+            query = supabase.table(table).update(data)
+            # Apply filters
+            for key, value in filters.items():
+                query = query.eq(key, value)
+            result = query.execute()
+            return f"Update successful: {len(result.data)} row(s) updated in {table}"
+        elif operation_type == 'delete':
+            if not filters:
+                return "Error: Filters are required for delete operation."
+            query = supabase.table(table).delete()
+            # Apply filters
+            for key, value in filters.items():
+                query = query.eq(key, value)
+            result = query.execute()
+            return f"Delete successful: Rows deleted from {table}"
+    except Exception as e:
+        return f"Error performing Supabase operation: {str(e)}"
 # System prompt to guide the model's behavior
+#web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
+#webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
+#Give preference to using Tavily Search and Wikipedia Search before using web_search or webpage_scrape. When Web_search does not return a result, use Tavily Search.
 SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
 python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
 wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
 tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
 arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
+supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
 The way you use the tools is by specifying a json blob.
 Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
 wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
 tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
 arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
 webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
+supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
+Example use for Supabase:
+Insert data:
 ```json
 {
+  "action": "supabase_operation",
+  "action_input": {"operation_type": "insert", "table": "users", "data": {"name": "John Doe", "email": "john@example.com"}}
 }
 ```
+Select data:
 ```json
 {
+  "action": "supabase_operation",
+  "action_input": {"operation_type": "select", "table": "users", "filters": {"id": 1}}
 }
 ```
+Update data:
+```json
+{
+  "action": "supabase_operation",
+  "action_input": {"operation_type": "update", "table": "users", "data": {"name": "Jane Doe"}, "filters": {"id": 1}}
+}
+```
+Delete data:
 ```json
 {
+  "action": "supabase_operation",
+  "action_input": {"operation_type": "delete", "table": "users", "filters": {"id": 1}}
 }
 ```
 ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
 Question: [the user's question]
 Thought: [your reasoning about what to do next]
 Action:
 ```json
 {
   "action_input": {"[parameter_name]": "[parameter_value]"}
 }
 ```
 Observation: [the result from the tool will appear here]
 Thought: [your reasoning after seeing the observation]
 Action:
 ```json
 {
   "action_input": {"[parameter_name]": "[parameter_value]"}
 }
 ```
 Observation: [another tool result will appear here]
 IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
 1. First reason about the problem in the "Thought" section
 2. Then decide what action to take in the "Action" section (using the tools)
 NEVER fake or simulate tool output yourself.
 ... (this Thought/Action/Observation cycle can repeat as needed) ...
 Thought: I now know the final answer
 Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
 Make sure to follow any formatting instructions given by the user.
 Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
 # Generate the chat interface, including the tools
 chat = llm
 # Tools are defined but not bound to the LLM here
 tools_config = [
+    # {
+    #     "name": "web_search",
+    #     "description": "Search the web for current information. Provide a specific search query in the format: {\"query\": \"your search query here\"}",
+    #     "func": safe_web_search
+    # },
     {
         "name": "python_code",
         "description": "Execute Python code. Provide the complete Python code as a string in the format: {\"code\": \"your python code here\"}",
         "func": run_python_code
     },
+    # {
+    #     "name": "webpage_scrape",
+    #     "description": "Scrape content from a specific webpage URL. Provide a valid URL in the format: {\"url\": \"https://example.com\"}",
+    #     "func": scrape_webpage
+    # },
     {
         "name": "wikipedia_search",
         "description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
         "name": "arxiv_search",
         "description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
         "func": arxiv_search
+    },
+    {
+        "name": "supabase_operation",
+        "description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
+        "func": supabase_operation
     }
 ]
         print(f"Error extracting JSON: {e}")
         return None
+# Comment out the web_search_node function
+# def web_search_node(state: AgentState) -> Dict[str, Any]:
+#     """Node that executes the web search tool."""
+#     print("Web Search Tool Called...\n\n")
+#
+#     # Extract tool arguments
+#     action_input = state.get("action_input", {})
+#     print(f"Web search action_input: {action_input}")
+#
+#     # Try different ways to extract the query
+#     query = ""
+#     if isinstance(action_input, dict):
+#         query = action_input.get("query", "")
+#     elif isinstance(action_input, str):
+#         query = action_input
+#
+#     print(f"Searching for: '{query}'")
+#
+#     # Call the search function with retry logic
+#     result = safe_web_search(query)
+#     print(f"Search result: {result}")  # Print the full result for debugging
+#
+#     # Check if we hit rate limits and add a helpful note
+#     if "rate limit" in result.lower() or "ratelimit" in result.lower():
+#         result += "\n\nNote: You can use your internal knowledge to provide a response since the search is rate limited."
+#
+#     # Format the observation to continue the ReAct cycle
+#     # Don't include "Observation:" as the assistant is stopped at this token
+#     observation = result
+#
+#     # Create a tool message with the result
+#     tool_message = AIMessage(
+#         content=f"Observation: {observation}"
+#     )
+#
+#     # Print the observation that will be sent back to the assistant
+#     print("\n=== TOOL OBSERVATION ===")
+#     print(tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content)
+#     print("=== END OBSERVATION ===\n")
+#
+#     # Return the updated state
+#     return {
+#         "messages": state["messages"] + [tool_message],
+#         "current_tool": None,  # Reset the current tool
+#         "action_input": None   # Clear the action input
+#     }
 def python_code_node(state: AgentState) -> Dict[str, Any]:
     """Node that executes Python code."""
         "action_input": None   # Clear the action input
     }
+def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
+    """Node that processes Supabase database operations."""
+    print("Supabase Operation Tool Called...\n\n")
+    # Extract tool arguments
+    action_input = state.get("action_input", {})
+    print(f"Supabase operation action_input: {action_input}")
+    # Extract required parameters
+    operation_type = ""
+    table = ""
+    data = None
+    filters = None
+    if isinstance(action_input, dict):
+        operation_type = action_input.get("operation_type", "")
+        table = action_input.get("table", "")
+        data = action_input.get("data")
+        filters = action_input.get("filters")
+    print(f"Supabase operation: {operation_type} on table {table}")
+    # Safety check
+    if not operation_type or not table:
+        result = "Error: Both operation_type and table are required. operation_type should be one of: insert, select, update, delete"
+    else:
+        # Call the Supabase operation function
+        result = supabase_operation(operation_type, table, data, filters)
+    print(f"Supabase operation result length: {len(result)}")
+    # Format the observation to continue the ReAct cycle
+    tool_message = AIMessage(
+        content=f"Observation: {result.strip()}"
+    )
+    # Print the observation that will be sent back to the assistant
+    print("\n=== TOOL OBSERVATION ===")
+    content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
+    print(content_preview)
+    print("=== END OBSERVATION ===\n")
+    # Return the updated state
+    return {
+        "messages": state["messages"] + [tool_message],
+        "current_tool": None,  # Reset the current tool
+        "action_input": None   # Clear the action input
+    }
 # Router function to direct to the correct tool
 def router(state: AgentState) -> str:
     """Route to the appropriate tool based on the current_tool field."""
     print(f"Routing to: {tool}")
     print(f"Router received action_input: {action_input}")
+    # if tool == "web_search":
+    #     return "web_search"
+    if tool == "python_code":
         return "python_code"
     elif tool == "webpage_scrape":
         return "webpage_scrape"
         return "tavily_search"
     elif tool == "arxiv_search":
         return "arxiv_search"
+    elif tool == "supabase_operation":
+        return "supabase_operation"
     else:
         return "end"
     # Define nodes: these do the work
     builder.add_node("assistant", assistant)
+    # builder.add_node("web_search", web_search_node)
     builder.add_node("python_code", python_code_node)
     builder.add_node("webpage_scrape", webpage_scrape_node)
     builder.add_node("wikipedia_search", wikipedia_search_node)
     builder.add_node("tavily_search", tavily_search_node)
     builder.add_node("arxiv_search", arxiv_search_node)
+    builder.add_node("supabase_operation", supabase_operation_node)
     # Define edges: these determine how the control flow moves
     builder.add_edge(START, "assistant")
         "debug",
         router,
         {
+            # "web_search": "web_search",
             "python_code": "python_code",
             "webpage_scrape": "webpage_scrape",
             "wikipedia_search": "wikipedia_search",
             "tavily_search": "tavily_search",
             "arxiv_search": "arxiv_search",
+            "supabase_operation": "supabase_operation",
             "end": END
         }
     )
     # Tools always go back to assistant
+    # builder.add_edge("web_search", "assistant")
     builder.add_edge("python_code", "assistant")
     builder.add_edge("webpage_scrape", "assistant")
     builder.add_edge("wikipedia_search", "assistant")
     builder.add_edge("tavily_search", "assistant")
     builder.add_edge("arxiv_search", "assistant")
+    builder.add_edge("supabase_operation", "assistant")
     # Compile the graph
     return builder.compile()
 # Example usage:
 if __name__ == "__main__":
     agent = TurboNerd(max_execution_time=60)
+    response = agent("When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?")
     print("\nFinal Response:")
     print(response)

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ duckduckgo-search
 langchain-community
 apify-client
 beautifulsoup4
-html2text

 langchain-community
 apify-client
 beautifulsoup4
+html2text
+supabase