Lasdw commited on
Commit
4f3da4f
·
1 Parent(s): 97d9a2e

Added Excel-to-text tool

Browse files
Files changed (2) hide show
  1. agent.py +92 -0
  2. requirements.txt +3 -1
agent.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  from dotenv import load_dotenv
3
  from typing import TypedDict, Annotated, Dict, Any, Optional, Union, List
 
4
  from langgraph.graph.message import add_messages
5
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
6
  from langgraph.prebuilt import ToolNode
@@ -21,6 +22,8 @@ from urllib.parse import quote, urlparse
21
  import sys
22
  from bs4 import BeautifulSoup
23
  import html2text
 
 
24
 
25
  from apify_client import ApifyClient
26
  from langchain_community.document_loaders import WikipediaLoader
@@ -677,6 +680,38 @@ def supabase_operation(operation_type: str, table: str, data: dict = None, filte
677
  except Exception as e:
678
  return f"Error performing Supabase operation: {str(e)}"
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  # System prompt to guide the model's behavior
681
  #web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
682
  #webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
@@ -689,6 +724,7 @@ wikipedia_search: Search Wikipedia for information about a specific topic. Optio
689
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
690
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
691
  supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
 
692
 
693
  The way you use the tools is by specifying a json blob.
694
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
@@ -700,6 +736,7 @@ tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_d
700
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
701
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
702
  supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
 
703
 
704
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
705
 
@@ -798,6 +835,11 @@ tools_config = [
798
  "name": "supabase_operation",
799
  "description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
800
  "func": supabase_operation
 
 
 
 
 
801
  }
802
  ]
803
 
@@ -1379,6 +1421,51 @@ def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
1379
  "action_input": None # Clear the action input
1380
  }
1381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1382
  # Router function to direct to the correct tool
1383
  def router(state: AgentState) -> str:
1384
  """Route to the appropriate tool based on the current_tool field."""
@@ -1401,6 +1488,8 @@ def router(state: AgentState) -> str:
1401
  return "arxiv_search"
1402
  elif tool == "supabase_operation":
1403
  return "supabase_operation"
 
 
1404
  else:
1405
  return "end"
1406
 
@@ -1418,6 +1507,7 @@ def create_agent_graph() -> StateGraph:
1418
  builder.add_node("tavily_search", tavily_search_node)
1419
  builder.add_node("arxiv_search", arxiv_search_node)
1420
  builder.add_node("supabase_operation", supabase_operation_node)
 
1421
 
1422
  # Define edges: these determine how the control flow moves
1423
  builder.add_edge(START, "assistant")
@@ -1449,6 +1539,7 @@ def create_agent_graph() -> StateGraph:
1449
  "tavily_search": "tavily_search",
1450
  "arxiv_search": "arxiv_search",
1451
  "supabase_operation": "supabase_operation",
 
1452
  "end": END
1453
  }
1454
  )
@@ -1461,6 +1552,7 @@ def create_agent_graph() -> StateGraph:
1461
  builder.add_edge("tavily_search", "assistant")
1462
  builder.add_edge("arxiv_search", "assistant")
1463
  builder.add_edge("supabase_operation", "assistant")
 
1464
 
1465
  # Compile the graph
1466
  return builder.compile()
 
1
  import os
2
  from dotenv import load_dotenv
3
  from typing import TypedDict, Annotated, Dict, Any, Optional, Union, List
4
+ from pathlib import Path
5
  from langgraph.graph.message import add_messages
6
  from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
7
  from langgraph.prebuilt import ToolNode
 
22
  import sys
23
  from bs4 import BeautifulSoup
24
  import html2text
25
+ import pandas as pd
26
+ from tabulate import tabulate
27
 
28
  from apify_client import ApifyClient
29
  from langchain_community.document_loaders import WikipediaLoader
 
680
  except Exception as e:
681
  return f"Error performing Supabase operation: {str(e)}"
682
 
683
+ def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
684
+ """
685
+ Read an Excel file and return a Markdown table of the requested sheet.
686
+
687
+ Args:
688
+ excel_path: Path to the Excel file (.xlsx or .xls).
689
+ sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
690
+
691
+ Returns:
692
+ A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
693
+ """
694
+ file_path = Path(excel_path).expanduser().resolve()
695
+ if not file_path.is_file():
696
+ return f"Error: Excel file not found at {file_path}"
697
+
698
+ try:
699
+ sheet: Union[str, int] = (
700
+ int(sheet_name)
701
+ if sheet_name and sheet_name.isdigit()
702
+ else sheet_name or 0
703
+ )
704
+
705
+ df = pd.read_excel(file_path, sheet_name=sheet)
706
+
707
+ if hasattr(df, "to_markdown"):
708
+ return df.to_markdown(index=False)
709
+
710
+ return tabulate(df, headers="keys", tablefmt="github", showindex=False)
711
+
712
+ except Exception as e:
713
+ return f"Error reading Excel file: {e}"
714
+
715
  # System prompt to guide the model's behavior
716
  #web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
717
  #webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
 
724
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
725
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
726
  supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
727
+ excel_to_text: Read an Excel file and convert it to a Markdown table. Provide the path to the Excel file and optionally the sheet name or index.
728
 
729
  The way you use the tools is by specifying a json blob.
730
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
 
736
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
737
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
738
  supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
739
+ excel_to_text: Convert Excel to Markdown table, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
740
 
741
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
742
 
 
835
  "name": "supabase_operation",
836
  "description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
837
  "func": supabase_operation
838
+ },
839
+ {
840
+ "name": "excel_to_text",
841
+ "description": "Read an Excel file and return a Markdown table of the requested sheet. Provide the file path and optionally the sheet name or index.",
842
+ "func": excel_to_text
843
  }
844
  ]
845
 
 
1421
  "action_input": None # Clear the action input
1422
  }
1423
 
1424
+ def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
1425
+ """Node that processes Excel to Markdown table conversions."""
1426
+ print("Excel to Text Tool Called...\n\n")
1427
+
1428
+ # Extract tool arguments
1429
+ action_input = state.get("action_input", {})
1430
+ print(f"Excel to text action_input: {action_input}")
1431
+
1432
+ # Extract required parameters
1433
+ excel_path = ""
1434
+ sheet_name = None
1435
+
1436
+ if isinstance(action_input, dict):
1437
+ excel_path = action_input.get("excel_path", "")
1438
+ sheet_name = action_input.get("sheet_name")
1439
+
1440
+ print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}")
1441
+
1442
+ # Safety check
1443
+ if not excel_path:
1444
+ result = "Error: Excel file path is required"
1445
+ else:
1446
+ # Call the Excel to text function
1447
+ result = excel_to_text(excel_path, sheet_name)
1448
+
1449
+ print(f"Excel to text result length: {len(result)}")
1450
+
1451
+ # Format the observation to continue the ReAct cycle
1452
+ tool_message = AIMessage(
1453
+ content=f"Observation: {result.strip()}"
1454
+ )
1455
+
1456
+ # Print the observation that will be sent back to the assistant
1457
+ print("\n=== TOOL OBSERVATION ===")
1458
+ content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1459
+ print(content_preview)
1460
+ print("=== END OBSERVATION ===\n")
1461
+
1462
+ # Return the updated state
1463
+ return {
1464
+ "messages": state["messages"] + [tool_message],
1465
+ "current_tool": None, # Reset the current tool
1466
+ "action_input": None # Clear the action input
1467
+ }
1468
+
1469
  # Router function to direct to the correct tool
1470
  def router(state: AgentState) -> str:
1471
  """Route to the appropriate tool based on the current_tool field."""
 
1488
  return "arxiv_search"
1489
  elif tool == "supabase_operation":
1490
  return "supabase_operation"
1491
+ elif tool == "excel_to_text":
1492
+ return "excel_to_text"
1493
  else:
1494
  return "end"
1495
 
 
1507
  builder.add_node("tavily_search", tavily_search_node)
1508
  builder.add_node("arxiv_search", arxiv_search_node)
1509
  builder.add_node("supabase_operation", supabase_operation_node)
1510
+ builder.add_node("excel_to_text", excel_to_text_node)
1511
 
1512
  # Define edges: these determine how the control flow moves
1513
  builder.add_edge(START, "assistant")
 
1539
  "tavily_search": "tavily_search",
1540
  "arxiv_search": "arxiv_search",
1541
  "supabase_operation": "supabase_operation",
1542
+ "excel_to_text": "excel_to_text",
1543
  "end": END
1544
  }
1545
  )
 
1552
  builder.add_edge("tavily_search", "assistant")
1553
  builder.add_edge("arxiv_search", "assistant")
1554
  builder.add_edge("supabase_operation", "assistant")
1555
+ builder.add_edge("excel_to_text", "assistant")
1556
 
1557
  # Compile the graph
1558
  return builder.compile()
requirements.txt CHANGED
@@ -8,4 +8,6 @@ langchain-community
8
  apify-client
9
  beautifulsoup4
10
  html2text
11
- supabase
 
 
 
8
  apify-client
9
  beautifulsoup4
10
  html2text
11
+ supabase
12
+ pandas
13
+ tabulate