Spaces:
Sleeping
Sleeping
Added Excel-to-text tool
Browse files- agent.py +92 -0
- requirements.txt +3 -1
agent.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from typing import TypedDict, Annotated, Dict, Any, Optional, Union, List
|
|
|
|
| 4 |
from langgraph.graph.message import add_messages
|
| 5 |
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
|
| 6 |
from langgraph.prebuilt import ToolNode
|
|
@@ -21,6 +22,8 @@ from urllib.parse import quote, urlparse
|
|
| 21 |
import sys
|
| 22 |
from bs4 import BeautifulSoup
|
| 23 |
import html2text
|
|
|
|
|
|
|
| 24 |
|
| 25 |
from apify_client import ApifyClient
|
| 26 |
from langchain_community.document_loaders import WikipediaLoader
|
|
@@ -677,6 +680,38 @@ def supabase_operation(operation_type: str, table: str, data: dict = None, filte
|
|
| 677 |
except Exception as e:
|
| 678 |
return f"Error performing Supabase operation: {str(e)}"
|
| 679 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
# System prompt to guide the model's behavior
|
| 681 |
#web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
|
| 682 |
#webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
|
|
@@ -689,6 +724,7 @@ wikipedia_search: Search Wikipedia for information about a specific topic. Optio
|
|
| 689 |
tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
|
| 690 |
arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
|
| 691 |
supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
|
|
|
|
| 692 |
|
| 693 |
The way you use the tools is by specifying a json blob.
|
| 694 |
Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
|
|
@@ -700,6 +736,7 @@ tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_d
|
|
| 700 |
arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
|
| 701 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 702 |
supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
|
|
|
|
| 703 |
|
| 704 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 705 |
|
|
@@ -798,6 +835,11 @@ tools_config = [
|
|
| 798 |
"name": "supabase_operation",
|
| 799 |
"description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
|
| 800 |
"func": supabase_operation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 801 |
}
|
| 802 |
]
|
| 803 |
|
|
@@ -1379,6 +1421,51 @@ def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
|
|
| 1379 |
"action_input": None # Clear the action input
|
| 1380 |
}
|
| 1381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1382 |
# Router function to direct to the correct tool
|
| 1383 |
def router(state: AgentState) -> str:
|
| 1384 |
"""Route to the appropriate tool based on the current_tool field."""
|
|
@@ -1401,6 +1488,8 @@ def router(state: AgentState) -> str:
|
|
| 1401 |
return "arxiv_search"
|
| 1402 |
elif tool == "supabase_operation":
|
| 1403 |
return "supabase_operation"
|
|
|
|
|
|
|
| 1404 |
else:
|
| 1405 |
return "end"
|
| 1406 |
|
|
@@ -1418,6 +1507,7 @@ def create_agent_graph() -> StateGraph:
|
|
| 1418 |
builder.add_node("tavily_search", tavily_search_node)
|
| 1419 |
builder.add_node("arxiv_search", arxiv_search_node)
|
| 1420 |
builder.add_node("supabase_operation", supabase_operation_node)
|
|
|
|
| 1421 |
|
| 1422 |
# Define edges: these determine how the control flow moves
|
| 1423 |
builder.add_edge(START, "assistant")
|
|
@@ -1449,6 +1539,7 @@ def create_agent_graph() -> StateGraph:
|
|
| 1449 |
"tavily_search": "tavily_search",
|
| 1450 |
"arxiv_search": "arxiv_search",
|
| 1451 |
"supabase_operation": "supabase_operation",
|
|
|
|
| 1452 |
"end": END
|
| 1453 |
}
|
| 1454 |
)
|
|
@@ -1461,6 +1552,7 @@ def create_agent_graph() -> StateGraph:
|
|
| 1461 |
builder.add_edge("tavily_search", "assistant")
|
| 1462 |
builder.add_edge("arxiv_search", "assistant")
|
| 1463 |
builder.add_edge("supabase_operation", "assistant")
|
|
|
|
| 1464 |
|
| 1465 |
# Compile the graph
|
| 1466 |
return builder.compile()
|
|
|
|
| 1 |
import os
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from typing import TypedDict, Annotated, Dict, Any, Optional, Union, List
|
| 4 |
+
from pathlib import Path
|
| 5 |
from langgraph.graph.message import add_messages
|
| 6 |
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
|
| 7 |
from langgraph.prebuilt import ToolNode
|
|
|
|
| 22 |
import sys
|
| 23 |
from bs4 import BeautifulSoup
|
| 24 |
import html2text
|
| 25 |
+
import pandas as pd
|
| 26 |
+
from tabulate import tabulate
|
| 27 |
|
| 28 |
from apify_client import ApifyClient
|
| 29 |
from langchain_community.document_loaders import WikipediaLoader
|
|
|
|
| 680 |
except Exception as e:
|
| 681 |
return f"Error performing Supabase operation: {str(e)}"
|
| 682 |
|
| 683 |
+
def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
|
| 684 |
+
"""
|
| 685 |
+
Read an Excel file and return a Markdown table of the requested sheet.
|
| 686 |
+
|
| 687 |
+
Args:
|
| 688 |
+
excel_path: Path to the Excel file (.xlsx or .xls).
|
| 689 |
+
sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
|
| 690 |
+
|
| 691 |
+
Returns:
|
| 692 |
+
A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
|
| 693 |
+
"""
|
| 694 |
+
file_path = Path(excel_path).expanduser().resolve()
|
| 695 |
+
if not file_path.is_file():
|
| 696 |
+
return f"Error: Excel file not found at {file_path}"
|
| 697 |
+
|
| 698 |
+
try:
|
| 699 |
+
sheet: Union[str, int] = (
|
| 700 |
+
int(sheet_name)
|
| 701 |
+
if sheet_name and sheet_name.isdigit()
|
| 702 |
+
else sheet_name or 0
|
| 703 |
+
)
|
| 704 |
+
|
| 705 |
+
df = pd.read_excel(file_path, sheet_name=sheet)
|
| 706 |
+
|
| 707 |
+
if hasattr(df, "to_markdown"):
|
| 708 |
+
return df.to_markdown(index=False)
|
| 709 |
+
|
| 710 |
+
return tabulate(df, headers="keys", tablefmt="github", showindex=False)
|
| 711 |
+
|
| 712 |
+
except Exception as e:
|
| 713 |
+
return f"Error reading Excel file: {e}"
|
| 714 |
+
|
| 715 |
# System prompt to guide the model's behavior
|
| 716 |
#web_search: Search the google search engine when Tavily Search and Wikipedia Search do not return a result. Provide a specific search query.
|
| 717 |
#webpage_scrape: Scrape content from a specific webpage URL when Tavily Search and Wikipedia Search do not return a result. Provide a valid URL to extract information from a particular web page.
|
|
|
|
| 724 |
tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
|
| 725 |
arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
|
| 726 |
supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
|
| 727 |
+
excel_to_text: Read an Excel file and convert it to a Markdown table. Provide the path to the Excel file and optionally the sheet name or index.
|
| 728 |
|
| 729 |
The way you use the tools is by specifying a json blob.
|
| 730 |
Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
|
|
|
|
| 736 |
arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
|
| 737 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 738 |
supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
|
| 739 |
+
excel_to_text: Convert Excel to Markdown table, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
|
| 740 |
|
| 741 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 742 |
|
|
|
|
| 835 |
"name": "supabase_operation",
|
| 836 |
"description": "Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters. ",
|
| 837 |
"func": supabase_operation
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"name": "excel_to_text",
|
| 841 |
+
"description": "Read an Excel file and return a Markdown table of the requested sheet. Provide the file path and optionally the sheet name or index.",
|
| 842 |
+
"func": excel_to_text
|
| 843 |
}
|
| 844 |
]
|
| 845 |
|
|
|
|
| 1421 |
"action_input": None # Clear the action input
|
| 1422 |
}
|
| 1423 |
|
| 1424 |
+
def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
|
| 1425 |
+
"""Node that processes Excel to Markdown table conversions."""
|
| 1426 |
+
print("Excel to Text Tool Called...\n\n")
|
| 1427 |
+
|
| 1428 |
+
# Extract tool arguments
|
| 1429 |
+
action_input = state.get("action_input", {})
|
| 1430 |
+
print(f"Excel to text action_input: {action_input}")
|
| 1431 |
+
|
| 1432 |
+
# Extract required parameters
|
| 1433 |
+
excel_path = ""
|
| 1434 |
+
sheet_name = None
|
| 1435 |
+
|
| 1436 |
+
if isinstance(action_input, dict):
|
| 1437 |
+
excel_path = action_input.get("excel_path", "")
|
| 1438 |
+
sheet_name = action_input.get("sheet_name")
|
| 1439 |
+
|
| 1440 |
+
print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}")
|
| 1441 |
+
|
| 1442 |
+
# Safety check
|
| 1443 |
+
if not excel_path:
|
| 1444 |
+
result = "Error: Excel file path is required"
|
| 1445 |
+
else:
|
| 1446 |
+
# Call the Excel to text function
|
| 1447 |
+
result = excel_to_text(excel_path, sheet_name)
|
| 1448 |
+
|
| 1449 |
+
print(f"Excel to text result length: {len(result)}")
|
| 1450 |
+
|
| 1451 |
+
# Format the observation to continue the ReAct cycle
|
| 1452 |
+
tool_message = AIMessage(
|
| 1453 |
+
content=f"Observation: {result.strip()}"
|
| 1454 |
+
)
|
| 1455 |
+
|
| 1456 |
+
# Print the observation that will be sent back to the assistant
|
| 1457 |
+
print("\n=== TOOL OBSERVATION ===")
|
| 1458 |
+
content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
|
| 1459 |
+
print(content_preview)
|
| 1460 |
+
print("=== END OBSERVATION ===\n")
|
| 1461 |
+
|
| 1462 |
+
# Return the updated state
|
| 1463 |
+
return {
|
| 1464 |
+
"messages": state["messages"] + [tool_message],
|
| 1465 |
+
"current_tool": None, # Reset the current tool
|
| 1466 |
+
"action_input": None # Clear the action input
|
| 1467 |
+
}
|
| 1468 |
+
|
| 1469 |
# Router function to direct to the correct tool
|
| 1470 |
def router(state: AgentState) -> str:
|
| 1471 |
"""Route to the appropriate tool based on the current_tool field."""
|
|
|
|
| 1488 |
return "arxiv_search"
|
| 1489 |
elif tool == "supabase_operation":
|
| 1490 |
return "supabase_operation"
|
| 1491 |
+
elif tool == "excel_to_text":
|
| 1492 |
+
return "excel_to_text"
|
| 1493 |
else:
|
| 1494 |
return "end"
|
| 1495 |
|
|
|
|
| 1507 |
builder.add_node("tavily_search", tavily_search_node)
|
| 1508 |
builder.add_node("arxiv_search", arxiv_search_node)
|
| 1509 |
builder.add_node("supabase_operation", supabase_operation_node)
|
| 1510 |
+
builder.add_node("excel_to_text", excel_to_text_node)
|
| 1511 |
|
| 1512 |
# Define edges: these determine how the control flow moves
|
| 1513 |
builder.add_edge(START, "assistant")
|
|
|
|
| 1539 |
"tavily_search": "tavily_search",
|
| 1540 |
"arxiv_search": "arxiv_search",
|
| 1541 |
"supabase_operation": "supabase_operation",
|
| 1542 |
+
"excel_to_text": "excel_to_text",
|
| 1543 |
"end": END
|
| 1544 |
}
|
| 1545 |
)
|
|
|
|
| 1552 |
builder.add_edge("tavily_search", "assistant")
|
| 1553 |
builder.add_edge("arxiv_search", "assistant")
|
| 1554 |
builder.add_edge("supabase_operation", "assistant")
|
| 1555 |
+
builder.add_edge("excel_to_text", "assistant")
|
| 1556 |
|
| 1557 |
# Compile the graph
|
| 1558 |
return builder.compile()
|
requirements.txt
CHANGED
|
@@ -8,4 +8,6 @@ langchain-community
|
|
| 8 |
apify-client
|
| 9 |
beautifulsoup4
|
| 10 |
html2text
|
| 11 |
-
supabase
|
|
|
|
|
|
|
|
|
| 8 |
apify-client
|
| 9 |
beautifulsoup4
|
| 10 |
html2text
|
| 11 |
+
supabase
|
| 12 |
+
pandas
|
| 13 |
+
tabulate
|