Lasdw commited on
Commit
4a2c0eb
·
1 Parent(s): 4f3da4f

Added attachment processing

Browse files
Files changed (1) hide show
  1. agent.py +116 -19
agent.py CHANGED
@@ -24,11 +24,12 @@ from bs4 import BeautifulSoup
24
  import html2text
25
  import pandas as pd
26
  from tabulate import tabulate
 
27
 
28
  from apify_client import ApifyClient
29
  from langchain_community.document_loaders import WikipediaLoader
30
  from langchain_community.document_loaders import ArxivLoader
31
- from langchain_community.tools.tavily_search import TavilySearchResults # For Tavily search
32
  from supabase import create_client, Client
33
 
34
  load_dotenv()
@@ -680,22 +681,35 @@ def supabase_operation(operation_type: str, table: str, data: dict = None, filte
680
  except Exception as e:
681
  return f"Error performing Supabase operation: {str(e)}"
682
 
683
- def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
684
  """
685
  Read an Excel file and return a Markdown table of the requested sheet.
686
 
687
  Args:
688
- excel_path: Path to the Excel file (.xlsx or .xls).
689
  sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
 
690
 
691
  Returns:
692
  A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
693
  """
694
- file_path = Path(excel_path).expanduser().resolve()
695
- if not file_path.is_file():
696
- return f"Error: Excel file not found at {file_path}"
697
-
698
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
  sheet: Union[str, int] = (
700
  int(sheet_name)
701
  if sheet_name and sheet_name.isdigit()
@@ -704,12 +718,21 @@ def excel_to_text(excel_path: str, sheet_name: Optional[str] = None) -> str:
704
 
705
  df = pd.read_excel(file_path, sheet_name=sheet)
706
 
 
 
 
 
 
707
  if hasattr(df, "to_markdown"):
708
  return df.to_markdown(index=False)
709
 
710
  return tabulate(df, headers="keys", tablefmt="github", showindex=False)
711
 
712
  except Exception as e:
 
 
 
 
713
  return f"Error reading Excel file: {e}"
714
 
715
  # System prompt to guide the model's behavior
@@ -724,7 +747,7 @@ wikipedia_search: Search Wikipedia for information about a specific topic. Optio
724
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
725
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
726
  supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
727
- excel_to_text: Read an Excel file and convert it to a Markdown table. Provide the path to the Excel file and optionally the sheet name or index.
728
 
729
  The way you use the tools is by specifying a json blob.
730
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
@@ -736,7 +759,8 @@ tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_d
736
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
737
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
738
  supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
739
- excel_to_text: Convert Excel to Markdown table, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
 
740
 
741
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
742
 
@@ -755,6 +779,13 @@ or
755
  "action_input": {"code": "c = a + b"}
756
  }
757
  ```
 
 
 
 
 
 
 
758
 
759
  ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
760
  Question: [the user's question]
@@ -838,7 +869,7 @@ tools_config = [
838
  },
839
  {
840
  "name": "excel_to_text",
841
- "description": "Read an Excel file and return a Markdown table of the requested sheet. Provide the file path and optionally the sheet name or index.",
842
  "func": excel_to_text
843
  }
844
  ]
@@ -862,6 +893,7 @@ class AgentState(TypedDict, total=False):
862
  current_tool: Optional[str]
863
  action_input: Optional[ActionInput]
864
  iteration_count: int # Added to track iterations
 
865
  # tool_call_id: Optional[str] # Ensure this is present if used by your graph logic for tools
866
 
867
  # Add prune_messages_for_llm function
@@ -1432,19 +1464,35 @@ def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
1432
  # Extract required parameters
1433
  excel_path = ""
1434
  sheet_name = None
 
1435
 
1436
  if isinstance(action_input, dict):
1437
  excel_path = action_input.get("excel_path", "")
1438
  sheet_name = action_input.get("sheet_name")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1439
 
1440
- print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}")
1441
 
1442
  # Safety check
1443
- if not excel_path:
1444
- result = "Error: Excel file path is required"
1445
  else:
1446
  # Call the Excel to text function
1447
- result = excel_to_text(excel_path, sheet_name)
1448
 
1449
  print(f"Excel to text result length: {len(result)}")
1450
 
@@ -1569,18 +1617,40 @@ class TurboNerd:
1569
  os.environ["APIFY_API_TOKEN"] = apify_api_token
1570
  print("Apify API token set successfully")
1571
 
1572
- def __call__(self, question: str) -> str:
1573
- """Process a question and return an answer."""
1574
- # Initialize the state with the question
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1575
  initial_state = {
1576
- "messages": [HumanMessage(content=f"Question: {question}")],
1577
  "current_tool": None,
1578
  "action_input": None,
1579
- "iteration_count": 0 # Initialize iteration_count
 
1580
  }
1581
 
1582
  # Run the graph
1583
  print(f"Starting graph execution with question: {question}")
 
 
1584
 
1585
  try:
1586
  # Set a reasonable recursion limit based on max_iterations
@@ -1616,3 +1686,30 @@ I need to make headings for the fruits and vegetables. Could you please create a
1616
  print("\nFinal Response:")
1617
  print(response)
1618
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  import html2text
25
  import pandas as pd
26
  from tabulate import tabulate
27
+ import base64
28
 
29
  from apify_client import ApifyClient
30
  from langchain_community.document_loaders import WikipediaLoader
31
  from langchain_community.document_loaders import ArxivLoader
32
+ from langchain_community.tools.tavily_search import TavilySearchResults
33
  from supabase import create_client, Client
34
 
35
  load_dotenv()
 
681
  except Exception as e:
682
  return f"Error performing Supabase operation: {str(e)}"
683
 
684
+ def excel_to_text(excel_path: str, sheet_name: Optional[str] = None, file_content: Optional[bytes] = None) -> str:
685
  """
686
  Read an Excel file and return a Markdown table of the requested sheet.
687
 
688
  Args:
689
+ excel_path: Path to the Excel file (.xlsx or .xls) or name for the attached file.
690
  sheet_name: Optional name or index of the sheet to read. If None, reads the first sheet.
691
+ file_content: Optional binary content of the file if provided as an attachment.
692
 
693
  Returns:
694
  A Markdown table representing the Excel sheet, or an error message if the file is not found or cannot be read.
695
  """
 
 
 
 
696
  try:
697
+ # Handle file attachment case
698
+ if file_content:
699
+ # Create a temporary file to save the attachment
700
+ with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp_file:
701
+ temp_file.write(file_content)
702
+ temp_path = temp_file.name
703
+
704
+ print(f"Saved attached Excel file to temporary location: {temp_path}")
705
+ file_path = Path(temp_path)
706
+ else:
707
+ # Regular file path case
708
+ file_path = Path(excel_path).expanduser().resolve()
709
+ if not file_path.is_file():
710
+ return f"Error: Excel file not found at {file_path}"
711
+
712
+ # Process the Excel file
713
  sheet: Union[str, int] = (
714
  int(sheet_name)
715
  if sheet_name and sheet_name.isdigit()
 
718
 
719
  df = pd.read_excel(file_path, sheet_name=sheet)
720
 
721
+ # Clean up temporary file if we created one
722
+ if file_content and os.path.exists(temp_path):
723
+ os.unlink(temp_path)
724
+ print(f"Deleted temporary Excel file: {temp_path}")
725
+
726
  if hasattr(df, "to_markdown"):
727
  return df.to_markdown(index=False)
728
 
729
  return tabulate(df, headers="keys", tablefmt="github", showindex=False)
730
 
731
  except Exception as e:
732
+ # Clean up temporary file in case of error
733
+ if file_content and 'temp_path' in locals() and os.path.exists(temp_path):
734
+ os.unlink(temp_path)
735
+ print(f"Deleted temporary Excel file due to error: {temp_path}")
736
  return f"Error reading Excel file: {e}"
737
 
738
  # System prompt to guide the model's behavior
 
747
  tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
748
  arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
749
  supabase_operation: Perform database operations on Supabase (insert, select, update, delete). Provide operation_type, table name, and optional data/filters.
750
+ excel_to_text: Read an Excel file and convert it to a Markdown table. You can provide either the path to an Excel file or use a file attachment. For attachments, provide a base64-encoded string of the file content and a filename.
751
 
752
  The way you use the tools is by specifying a json blob.
753
  Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).
 
759
  arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
760
  webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
761
  supabase_operation: Perform database operations, args: {"operation_type": {"type": "string"}, "table": {"type": "string"}, "data": {"type": "object", "optional": true}, "filters": {"type": "object", "optional": true}}
762
+ excel_to_text: Convert Excel to Markdown table with file path, args: {"excel_path": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
763
+ excel_to_text: Convert Excel to Markdown table with attachment, args: {"excel_path": {"type": "string"}, "file_content": {"type": "string"}, "sheet_name": {"type": "string", "optional": true}}
764
 
765
  IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
766
 
 
779
  "action_input": {"code": "c = a + b"}
780
  }
781
  ```
782
+ or
783
+ ```json
784
+ {
785
+ "action": "excel_to_text",
786
+ "action_input": {"excel_path": "data.xlsx", "file_content": "BASE64_ENCODED_CONTENT_HERE", "sheet_name": "Sheet1"}
787
+ }
788
+ ```
789
 
790
  ALWAYS follow this specific format for your responses. Your entire response will follow this pattern:
791
  Question: [the user's question]
 
869
  },
870
  {
871
  "name": "excel_to_text",
872
+ "description": "Read an Excel file and return a Markdown table. You can provide either the path to an Excel file or use a file attachment. For attachments, provide a base64-encoded string of the file content and a filename.",
873
  "func": excel_to_text
874
  }
875
  ]
 
893
  current_tool: Optional[str]
894
  action_input: Optional[ActionInput]
895
  iteration_count: int # Added to track iterations
896
+ attachments: Dict[str, str] # Added to store file attachments (filename -> base64 content)
897
  # tool_call_id: Optional[str] # Ensure this is present if used by your graph logic for tools
898
 
899
  # Add prune_messages_for_llm function
 
1464
  # Extract required parameters
1465
  excel_path = ""
1466
  sheet_name = None
1467
+ file_content = None
1468
 
1469
  if isinstance(action_input, dict):
1470
  excel_path = action_input.get("excel_path", "")
1471
  sheet_name = action_input.get("sheet_name")
1472
+
1473
+ # Check if there's attached file content (base64 encoded) directly in the action_input
1474
+ if "file_content" in action_input:
1475
+ try:
1476
+ file_content = base64.b64decode(action_input["file_content"])
1477
+ print(f"Decoded attached file content, size: {len(file_content)} bytes")
1478
+ except Exception as e:
1479
+ print(f"Error decoding file content: {e}")
1480
+ # Check if we should use a file from the attachments dictionary
1481
+ elif excel_path and "attachments" in state and excel_path in state["attachments"]:
1482
+ try:
1483
+ file_content = base64.b64decode(state["attachments"][excel_path])
1484
+ print(f"Using attachment '{excel_path}' from state, size: {len(file_content)} bytes")
1485
+ except Exception as e:
1486
+ print(f"Error using attachment {excel_path}: {e}")
1487
 
1488
+ print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}, has_attachment={file_content is not None}")
1489
 
1490
  # Safety check
1491
+ if not excel_path and not file_content:
1492
+ result = "Error: Either Excel file path or file content is required"
1493
  else:
1494
  # Call the Excel to text function
1495
+ result = excel_to_text(excel_path, sheet_name, file_content)
1496
 
1497
  print(f"Excel to text result length: {len(result)}")
1498
 
 
1617
  os.environ["APIFY_API_TOKEN"] = apify_api_token
1618
  print("Apify API token set successfully")
1619
 
1620
+ def __call__(self, question: str, attachments: dict = None) -> str:
1621
+ """
1622
+ Process a question and return an answer.
1623
+
1624
+ Args:
1625
+ question: The user's question text
1626
+ attachments: Optional dictionary of attachments with keys as names and values as base64-encoded content
1627
+ """
1628
+ # Process attachments if provided
1629
+ attachment_info = ""
1630
+ if attachments and isinstance(attachments, dict) and len(attachments) > 0:
1631
+ attachment_names = list(attachments.keys())
1632
+ attachment_info = f"\n\nI've attached the following files: {', '.join(attachment_names)}. "
1633
+
1634
+ # Add different instructions based on detected file types
1635
+ excel_files = [name for name in attachment_names if name.endswith(('.xlsx', '.xls'))]
1636
+ if excel_files:
1637
+ attachment_info += f"Use the excel_to_text tool with the file_content parameter to process the Excel files."
1638
+
1639
+ # Initialize the state with the question and attachment info
1640
+ question_with_attachments = question + attachment_info if attachment_info else question
1641
+
1642
  initial_state = {
1643
+ "messages": [HumanMessage(content=f"Question: {question_with_attachments}")],
1644
  "current_tool": None,
1645
  "action_input": None,
1646
+ "iteration_count": 0, # Initialize iteration_count
1647
+ "attachments": attachments or {} # Store attachments in the state
1648
  }
1649
 
1650
  # Run the graph
1651
  print(f"Starting graph execution with question: {question}")
1652
+ if attachments:
1653
+ print(f"Included attachments: {list(attachments.keys())}")
1654
 
1655
  try:
1656
  # Set a reasonable recursion limit based on max_iterations
 
1686
  print("\nFinal Response:")
1687
  print(response)
1688
 
1689
+ def save_attachment_to_tempfile(file_content_b64: str, file_extension: str = '.xlsx') -> str:
1690
+ """
1691
+ Decode a base64 file content and save it to a temporary file.
1692
+
1693
+ Args:
1694
+ file_content_b64: Base64 encoded file content
1695
+ file_extension: File extension to use for the temporary file
1696
+
1697
+ Returns:
1698
+ Path to the saved temporary file
1699
+ """
1700
+ try:
1701
+ # Decode the base64 content
1702
+ file_content = base64.b64decode(file_content_b64)
1703
+
1704
+ # Create a temporary file with the appropriate extension
1705
+ with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file:
1706
+ temp_file.write(file_content)
1707
+ temp_path = temp_file.name
1708
+
1709
+ print(f"Saved attachment to temporary file: {temp_path}")
1710
+ return temp_path
1711
+
1712
+ except Exception as e:
1713
+ print(f"Error saving attachment: {e}")
1714
+ return None
1715
+