Final_Assignment_Template

Sleeping

App Files Files Community

HollowVoice commited on May 2, 2025

Commit

1d4199f

1 Parent(s): 0e1166a

Added excel parser

Browse files

Files changed (2) hide show

agent.py +31 -3
requirements.txt +4 -1

agent.py CHANGED Viewed

@@ -20,6 +20,8 @@ from langchain_community.document_loaders import PythonLoader
 from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
 from langchain_core.documents.base import Blob
 load_dotenv()
@@ -118,6 +120,29 @@ def python_file_reader(file_name: str) -> str:
     return result
 # https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
 def audio_to_text(audio_file_name: str) -> str:
     """Listen to audio and extract text from speech
@@ -160,6 +185,7 @@ tools = [
     python_file_reader,
     audio_to_text,
     wikipedia_search,
 ]
@@ -192,7 +218,7 @@ def create_graph():
     llm_with_tools = llm.bind_tools(tools)
     # System message
-    original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
     system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
     sys_msg = SystemMessage(system_prompt_txt)
@@ -276,10 +302,12 @@ if __name__ == "__main__":
     messages = graph.invoke({"messages": messages})
     for m in messages["messages"]:
         m.pretty_print()
-"""
-    question = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
     messages = [HumanMessage(content=question)]
     messages = graph.invoke({"messages": messages})
     for m in messages["messages"]:
         m.pretty_print()

 from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
 from langchain_core.documents.base import Blob
+# excel
+from langchain_community.document_loaders import UnstructuredExcelLoader
 load_dotenv()
     return result
+# https://python.langchain.com/docs/integrations/document_loaders/microsoft_excel/
+# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.excel.UnstructuredExcelLoader.html
+def excel_file_reader(excel_file_name: str) -> str:
+    """Reads an excel file and returns the content
+    Args:
+        excel_file_name: the filename to read
+    """
+    file_path = os.path.join(os.path.dirname(__file__), "files", excel_file_name)
+    loader = UnstructuredExcelLoader(file_path, mode="elements")
+    documents = loader.load()
+    formatted_search_docs = "\n\n---\n\n"
+    for next_doc in documents:
+        formatted_doc = f'<Document source="{excel_file_name}"\n{next_doc.metadata["text_as_html"]}\n</Document>'
+        formatted_search_docs = formatted_search_docs + formatted_doc
+    result = f"{{python_code: {formatted_search_docs}}}"
+    return result
 # https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
 def audio_to_text(audio_file_name: str) -> str:
     """Listen to audio and extract text from speech
     python_file_reader,
     audio_to_text,
     wikipedia_search,
+    excel_file_reader,
 ]
     llm_with_tools = llm.bind_tools(tools)
     # System message
+    # original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
     system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
     sys_msg = SystemMessage(system_prompt_txt)
     messages = graph.invoke({"messages": messages})
     for m in messages["messages"]:
         m.pretty_print()
+    print("******** EXCEL TOOL ********")
+    question = "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places. File to use is 7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx"
     messages = [HumanMessage(content=question)]
     messages = graph.invoke({"messages": messages})
     for m in messages["messages"]:
         m.pretty_print()
+"""

requirements.txt CHANGED Viewed

@@ -8,4 +8,7 @@ langchain-community
 langchain-openai
 langgraph-cli[inmem]
 wikipedia
-tavily-python

 langchain-openai
 langgraph-cli[inmem]
 wikipedia
+tavily-python
+unstructured
+openpyxl
+networkx