Spaces:
Sleeping
Sleeping
Added attachment processing
Browse files
agent.py
CHANGED
|
@@ -24,11 +24,12 @@ from bs4 import BeautifulSoup
|
|
| 24 |
import html2text
|
| 25 |
import pandas as pd
|
| 26 |
from tabulate import tabulate
|
|
|
|
| 27 |
|
| 28 |
from apify_client import ApifyClient
|
| 29 |
from langchain_community.document_loaders import WikipediaLoader
|
| 30 |
from langchain_community.document_loaders import ArxivLoader
|
| 31 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 32 |
from supabase import create_client, Client
|
| 33 |
|
| 34 |
load_dotenv()
|
|
@@ -680,22 +681,35 @@ def supabase_operation(operation_type: str, table: str, data: dict = None, filte
|
|
| 680 |
except Exception as e:
|
| 681 |
return f"Error performing Supabase operation: {str(e)}"
|
| 682 |
|
| 683 |
-
def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
|
| 684 |
"""
|
| 685 |
Read an Excel file and return a Markdown table of the requested sheet.
|
| 686 |
|
| 687 |
Args:
|
| 688 |
-
excel_path: Path to the Excel file (.xlsx or .xls).
|
| 689 |
sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
|
|
|
|
| 690 |
|
| 691 |
Returns:
|
| 692 |
A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
|
| 693 |
"""
|
| 694 |
-
file_path = Path(excel_path).expanduser().resolve()
|
| 695 |
-
if not file_path.is_file():
|
| 696 |
-
return f"Error: Excel file not found at {file_path}"
|
| 697 |
-
|
| 698 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
sheet: Union[str, int] = (
|
| 700 |
int(sheet_name)
|
| 701 |
if sheet_name and sheet_name.isdigit()
|
|
@@ -704,12 +718,21 @@ def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
|
|
| 704 |
|
| 705 |
df = pd.read_excel(file_path, sheet_name=sheet)
|
| 706 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
if hasattr(df, "to_markdown"):
|
| 708 |
return df.to_markdown(index=False)
|
| 709 |
|
| 710 |
return tabulate(df, headers="keys", tablefmt="github", showindex=False)
|
| 711 |
|
| 712 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
return f"Error reading Excel file: {e}"
|
| 714 |
|
| 715 |
# System prompt to guide the model's behavior
|
|
@@ -724,7 +747,7 @@ wikipedia_search: Search Wikipedia for information about a specific topic. Optio
|
|
| 724 |
tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
|
| 725 |
arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
|
| 726 |
supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
|
| 727 |
-
excel_to_text: Read an Excel file and convert it to a Markdown table.
|
| 728 |
|
| 729 |
The way you use the tools is by specifying a json blob.
|
| 730 |
Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
|
|
@@ -736,7 +759,8 @@ tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_d
|
|
| 736 |
arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
|
| 737 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 738 |
supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
|
| 739 |
-
excel_to_text: Convert Excel to Markdown table, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
|
|
|
|
| 740 |
|
| 741 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 742 |
|
|
@@ -755,6 +779,13 @@ or
|
|
| 755 |
"action_input": {"code": "c = a + b"}
|
| 756 |
}
|
| 757 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
|
| 759 |
ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
|
| 760 |
Question: [the user's question]
|
|
@@ -838,7 +869,7 @@ tools_config = [
|
|
| 838 |
},
|
| 839 |
{
|
| 840 |
"name": "excel_to_text",
|
| 841 |
-
"description": "Read an Excel file and return a Markdown table
|
| 842 |
"func": excel_to_text
|
| 843 |
}
|
| 844 |
]
|
|
@@ -862,6 +893,7 @@ class AgentState(TypedDict, total=False):
|
|
| 862 |
current_tool: Optional[str]
|
| 863 |
action_input: Optional[ActionInput]
|
| 864 |
iteration_count: int # Added to track iterations
|
|
|
|
| 865 |
# tool_call_id: Optional[str] # Ensure this is present if used by your graph logic for tools
|
| 866 |
|
| 867 |
# Add prune_messages_for_llm function
|
|
@@ -1432,19 +1464,35 @@ def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
|
|
| 1432 |
# Extract required parameters
|
| 1433 |
excel_path = ""
|
| 1434 |
sheet_name = None
|
|
|
|
| 1435 |
|
| 1436 |
if isinstance(action_input, dict):
|
| 1437 |
excel_path = action_input.get("excel_path", "")
|
| 1438 |
sheet_name = action_input.get("sheet_name")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1439 |
|
| 1440 |
-
print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}")
|
| 1441 |
|
| 1442 |
# Safety check
|
| 1443 |
-
if not excel_path:
|
| 1444 |
-
result = "Error: Excel file path is required"
|
| 1445 |
else:
|
| 1446 |
# Call the Excel to text function
|
| 1447 |
-
result = excel_to_text(excel_path, sheet_name)
|
| 1448 |
|
| 1449 |
print(f"Excel to text result length: {len(result)}")
|
| 1450 |
|
|
@@ -1569,18 +1617,40 @@ class TurboNerd:
|
|
| 1569 |
os.environ["APIFY_API_TOKEN"] = apify_api_token
|
| 1570 |
print("Apify API token set successfully")
|
| 1571 |
|
| 1572 |
-
def __call__(self, question: str) -> str:
|
| 1573 |
-
"""
|
| 1574 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1575 |
initial_state = {
|
| 1576 |
-
"messages": [HumanMessage(content=f"Question: {
|
| 1577 |
"current_tool": None,
|
| 1578 |
"action_input": None,
|
| 1579 |
-
"iteration_count": 0 # Initialize iteration_count
|
|
|
|
| 1580 |
}
|
| 1581 |
|
| 1582 |
# Run the graph
|
| 1583 |
print(f"Starting graph execution with question: {question}")
|
|
|
|
|
|
|
| 1584 |
|
| 1585 |
try:
|
| 1586 |
# Set a reasonable recursion limit based on max_iterations
|
|
@@ -1616,3 +1686,30 @@ I need to make headings for the fruits and vegetables. Could you please create a
|
|
| 1616 |
print("\nFinal Response:")
|
| 1617 |
print(response)
|
| 1618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
import html2text
|
| 25 |
import pandas as pd
|
| 26 |
from tabulate import tabulate
|
| 27 |
+
import base64
|
| 28 |
|
| 29 |
from apify_client import ApifyClient
|
| 30 |
from langchain_community.document_loaders import WikipediaLoader
|
| 31 |
from langchain_community.document_loaders import ArxivLoader
|
| 32 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 33 |
from supabase import create_client, Client
|
| 34 |
|
| 35 |
load_dotenv()
|
|
|
|
| 681 |
except Exception as e:
|
| 682 |
return f"Error performing Supabase operation: {str(e)}"
|
| 683 |
|
| 684 |
+
def excel_to_text(excel_path: str, sheet_name: Optional[str] = None, file_content: Optional[bytes] = None) -> str:
|
| 685 |
"""
|
| 686 |
Read an Excel file and return a Markdown table of the requested sheet.
|
| 687 |
|
| 688 |
Args:
|
| 689 |
+
excel_path: Path to the Excel file (.xlsx or .xls) or name for the attached file.
|
| 690 |
sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
|
| 691 |
+
file_content: Optional binary content of the file if provided as an attachment.
|
| 692 |
|
| 693 |
Returns:
|
| 694 |
A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
|
| 695 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
try:
|
| 697 |
+
# Handle file attachment case
|
| 698 |
+
if file_content:
|
| 699 |
+
# Create a temporary file to save the attachment
|
| 700 |
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp_file:
|
| 701 |
+
temp_file.write(file_content)
|
| 702 |
+
temp_path = temp_file.name
|
| 703 |
+
|
| 704 |
+
print(f"Saved attached Excel file to temporary location: {temp_path}")
|
| 705 |
+
file_path = Path(temp_path)
|
| 706 |
+
else:
|
| 707 |
+
# Regular file path case
|
| 708 |
+
file_path = Path(excel_path).expanduser().resolve()
|
| 709 |
+
if not file_path.is_file():
|
| 710 |
+
return f"Error: Excel file not found at {file_path}"
|
| 711 |
+
|
| 712 |
+
# Process the Excel file
|
| 713 |
sheet: Union[str, int] = (
|
| 714 |
int(sheet_name)
|
| 715 |
if sheet_name and sheet_name.isdigit()
|
|
|
|
| 718 |
|
| 719 |
df = pd.read_excel(file_path, sheet_name=sheet)
|
| 720 |
|
| 721 |
+
# Clean up temporary file if we created one
|
| 722 |
+
if file_content and os.path.exists(temp_path):
|
| 723 |
+
os.unlink(temp_path)
|
| 724 |
+
print(f"Deleted temporary Excel file: {temp_path}")
|
| 725 |
+
|
| 726 |
if hasattr(df, "to_markdown"):
|
| 727 |
return df.to_markdown(index=False)
|
| 728 |
|
| 729 |
return tabulate(df, headers="keys", tablefmt="github", showindex=False)
|
| 730 |
|
| 731 |
except Exception as e:
|
| 732 |
+
# Clean up temporary file in case of error
|
| 733 |
+
if file_content and 'temp_path' in locals() and os.path.exists(temp_path):
|
| 734 |
+
os.unlink(temp_path)
|
| 735 |
+
print(f"Deleted temporary Excel file due to error: {temp_path}")
|
| 736 |
return f"Error reading Excel file: {e}"
|
| 737 |
|
| 738 |
# System prompt to guide the model's behavior
|
|
|
|
| 747 |
tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
|
| 748 |
arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
|
| 749 |
supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
|
| 750 |
+
excel_to_text: Read an Excel file and convert it to a Markdown table. You can provide either the path to an Excel file or use a file attachment. For attachments, provide a base64-encoded string of the file content and a filename.
|
| 751 |
|
| 752 |
The way you use the tools is by specifying a json blob.
|
| 753 |
Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
|
|
|
|
| 759 |
arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
|
| 760 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 761 |
supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
|
| 762 |
+
excel_to_text: Convert Excel to Markdown table with file path, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
|
| 763 |
+
excel_to_text: Convert Excel to Markdown table with attachment, args: {"excel_path": {"type": "string"}, "file_content": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
|
| 764 |
|
| 765 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 766 |
|
|
|
|
| 779 |
"action_input": {"code": "c = a + b"}
|
| 780 |
}
|
| 781 |
```
|
| 782 |
+
or
|
| 783 |
+
```json
|
| 784 |
+
{
|
| 785 |
+
"action": "excel_to_text",
|
| 786 |
+
"action_input": {"excel_path": "data.xlsx", "file_content": "BASE64_ENCODED_CONTENT_HERE", "sheet_name": "Sheet1"}
|
| 787 |
+
}
|
| 788 |
+
```
|
| 789 |
|
| 790 |
ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
|
| 791 |
Question: [the user's question]
|
|
|
|
| 869 |
},
|
| 870 |
{
|
| 871 |
"name": "excel_to_text",
|
| 872 |
+
"description": "Read an Excel file and return a Markdown table. You can provide either the path to an Excel file or use a file attachment. For attachments, provide a base64-encoded string of the file content and a filename.",
|
| 873 |
"func": excel_to_text
|
| 874 |
}
|
| 875 |
]
|
|
|
|
| 893 |
current_tool: Optional[str]
|
| 894 |
action_input: Optional[ActionInput]
|
| 895 |
iteration_count: int # Added to track iterations
|
| 896 |
+
attachments: Dict[str, str] # Added to store file attachments (filename -> base64 content)
|
| 897 |
# tool_call_id: Optional[str] # Ensure this is present if used by your graph logic for tools
|
| 898 |
|
| 899 |
# Add prune_messages_for_llm function
|
|
|
|
| 1464 |
# Extract required parameters
|
| 1465 |
excel_path = ""
|
| 1466 |
sheet_name = None
|
| 1467 |
+
file_content = None
|
| 1468 |
|
| 1469 |
if isinstance(action_input, dict):
|
| 1470 |
excel_path = action_input.get("excel_path", "")
|
| 1471 |
sheet_name = action_input.get("sheet_name")
|
| 1472 |
+
|
| 1473 |
+
# Check if there's attached file content (base64 encoded) directly in the action_input
|
| 1474 |
+
if "file_content" in action_input:
|
| 1475 |
+
try:
|
| 1476 |
+
file_content = base64.b64decode(action_input["file_content"])
|
| 1477 |
+
print(f"Decoded attached file content, size: {len(file_content)} bytes")
|
| 1478 |
+
except Exception as e:
|
| 1479 |
+
print(f"Error decoding file content: {e}")
|
| 1480 |
+
# Check if we should use a file from the attachments dictionary
|
| 1481 |
+
elif excel_path and "attachments" in state and excel_path in state["attachments"]:
|
| 1482 |
+
try:
|
| 1483 |
+
file_content = base64.b64decode(state["attachments"][excel_path])
|
| 1484 |
+
print(f"Using attachment '{excel_path}' from state, size: {len(file_content)} bytes")
|
| 1485 |
+
except Exception as e:
|
| 1486 |
+
print(f"Error using attachment {excel_path}: {e}")
|
| 1487 |
|
| 1488 |
+
print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}, has_attachment={file_content is not None}")
|
| 1489 |
|
| 1490 |
# Safety check
|
| 1491 |
+
if not excel_path and not file_content:
|
| 1492 |
+
result = "Error: Either Excel file path or file content is required"
|
| 1493 |
else:
|
| 1494 |
# Call the Excel to text function
|
| 1495 |
+
result = excel_to_text(excel_path, sheet_name, file_content)
|
| 1496 |
|
| 1497 |
print(f"Excel to text result length: {len(result)}")
|
| 1498 |
|
|
|
|
| 1617 |
os.environ["APIFY_API_TOKEN"] = apify_api_token
|
| 1618 |
print("Apify API token set successfully")
|
| 1619 |
|
| 1620 |
+
def __call__(self, question: str, attachments: dict = None) -> str:
|
| 1621 |
+
"""
|
| 1622 |
+
Process a question and return an answer.
|
| 1623 |
+
|
| 1624 |
+
Args:
|
| 1625 |
+
question: The user's question text
|
| 1626 |
+
attachments: Optional dictionary of attachments with keys as names and values as base64-encoded content
|
| 1627 |
+
"""
|
| 1628 |
+
# Process attachments if provided
|
| 1629 |
+
attachment_info = ""
|
| 1630 |
+
if attachments and isinstance(attachments, dict) and len(attachments) > 0:
|
| 1631 |
+
attachment_names = list(attachments.keys())
|
| 1632 |
+
attachment_info = f"\n\nI've attached the following files: {', '.join(attachment_names)}. "
|
| 1633 |
+
|
| 1634 |
+
# Add different instructions based on detected file types
|
| 1635 |
+
excel_files = [name for name in attachment_names if name.endswith(('.xlsx', '.xls'))]
|
| 1636 |
+
if excel_files:
|
| 1637 |
+
attachment_info += f"Use the excel_to_text tool with the file_content parameter to process the Excel files."
|
| 1638 |
+
|
| 1639 |
+
# Initialize the state with the question and attachment info
|
| 1640 |
+
question_with_attachments = question + attachment_info if attachment_info else question
|
| 1641 |
+
|
| 1642 |
initial_state = {
|
| 1643 |
+
"messages": [HumanMessage(content=f"Question: {question_with_attachments}")],
|
| 1644 |
"current_tool": None,
|
| 1645 |
"action_input": None,
|
| 1646 |
+
"iteration_count": 0, # Initialize iteration_count
|
| 1647 |
+
"attachments": attachments or {} # Store attachments in the state
|
| 1648 |
}
|
| 1649 |
|
| 1650 |
# Run the graph
|
| 1651 |
print(f"Starting graph execution with question: {question}")
|
| 1652 |
+
if attachments:
|
| 1653 |
+
print(f"Included attachments: {list(attachments.keys())}")
|
| 1654 |
|
| 1655 |
try:
|
| 1656 |
# Set a reasonable recursion limit based on max_iterations
|
|
|
|
| 1686 |
print("\nFinal Response:")
|
| 1687 |
print(response)
|
| 1688 |
|
| 1689 |
+
def save_attachment_to_tempfile(file_content_b64: str, file_extension: str = '.xlsx') -> str:
|
| 1690 |
+
"""
|
| 1691 |
+
Decode a base64 file content and save it to a temporary file.
|
| 1692 |
+
|
| 1693 |
+
Args:
|
| 1694 |
+
file_content_b64: Base64 encoded file content
|
| 1695 |
+
file_extension: File extension to use for the temporary file
|
| 1696 |
+
|
| 1697 |
+
Returns:
|
| 1698 |
+
Path to the saved temporary file
|
| 1699 |
+
"""
|
| 1700 |
+
try:
|
| 1701 |
+
# Decode the base64 content
|
| 1702 |
+
file_content = base64.b64decode(file_content_b64)
|
| 1703 |
+
|
| 1704 |
+
# Create a temporary file with the appropriate extension
|
| 1705 |
+
with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file:
|
| 1706 |
+
temp_file.write(file_content)
|
| 1707 |
+
temp_path = temp_file.name
|
| 1708 |
+
|
| 1709 |
+
print(f"Saved attachment to temporary file: {temp_path}")
|
| 1710 |
+
return temp_path
|
| 1711 |
+
|
| 1712 |
+
except Exception as e:
|
| 1713 |
+
print(f"Error saving attachment: {e}")
|
| 1714 |
+
return None
|
| 1715 |
+
|