HollowVoice commited on
Commit
1d4199f
·
1 Parent(s): 0e1166a

Added excel parser

Browse files
Files changed (2) hide show
  1. agent.py +31 -3
  2. requirements.txt +4 -1
agent.py CHANGED
@@ -20,6 +20,8 @@ from langchain_community.document_loaders import PythonLoader
20
  from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
21
  from langchain_core.documents.base import Blob
22
 
 
 
23
 
24
  load_dotenv()
25
 
@@ -118,6 +120,29 @@ def python_file_reader(file_name: str) -> str:
118
  return result
119
 
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
122
  def audio_to_text(audio_file_name: str) -> str:
123
  """Listen to audio and extract text from speech
@@ -160,6 +185,7 @@ tools = [
160
  python_file_reader,
161
  audio_to_text,
162
  wikipedia_search,
 
163
  ]
164
 
165
 
@@ -192,7 +218,7 @@ def create_graph():
192
  llm_with_tools = llm.bind_tools(tools)
193
 
194
  # System message
195
- original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
196
  system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
197
 
198
  sys_msg = SystemMessage(system_prompt_txt)
@@ -276,10 +302,12 @@ if __name__ == "__main__":
276
  messages = graph.invoke({"messages": messages})
277
  for m in messages["messages"]:
278
  m.pretty_print()
279
- """
280
 
281
- question = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
 
 
282
  messages = [HumanMessage(content=question)]
283
  messages = graph.invoke({"messages": messages})
284
  for m in messages["messages"]:
285
  m.pretty_print()
 
 
20
  from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
21
  from langchain_core.documents.base import Blob
22
 
23
+ # excel
24
+ from langchain_community.document_loaders import UnstructuredExcelLoader
25
 
26
  load_dotenv()
27
 
 
120
  return result
121
 
122
 
123
+ # https://python.langchain.com/docs/integrations/document_loaders/microsoft_excel/
124
+ # https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.excel.UnstructuredExcelLoader.html
125
+ def excel_file_reader(excel_file_name: str) -> str:
126
+ """Reads an excel file and returns the content
127
+
128
+ Args:
129
+ excel_file_name: the filename to read
130
+ """
131
+ file_path = os.path.join(os.path.dirname(__file__), "files", excel_file_name)
132
+ loader = UnstructuredExcelLoader(file_path, mode="elements")
133
+
134
+ documents = loader.load()
135
+ formatted_search_docs = "\n\n---\n\n"
136
+
137
+ for next_doc in documents:
138
+ formatted_doc = f'<Document source="{excel_file_name}"\n{next_doc.metadata["text_as_html"]}\n</Document>'
139
+ formatted_search_docs = formatted_search_docs + formatted_doc
140
+
141
+ result = f"{{python_code: {formatted_search_docs}}}"
142
+
143
+ return result
144
+
145
+
146
  # https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
147
  def audio_to_text(audio_file_name: str) -> str:
148
  """Listen to audio and extract text from speech
 
185
  python_file_reader,
186
  audio_to_text,
187
  wikipedia_search,
188
+ excel_file_reader,
189
  ]
190
 
191
 
 
218
  llm_with_tools = llm.bind_tools(tools)
219
 
220
  # System message
221
+ # original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
222
  system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
223
 
224
  sys_msg = SystemMessage(system_prompt_txt)
 
302
  messages = graph.invoke({"messages": messages})
303
  for m in messages["messages"]:
304
  m.pretty_print()
 
305
 
306
+
307
+ print("******** EXCEL TOOL ********")
308
+ question = "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places. File to use is 7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx"
309
  messages = [HumanMessage(content=question)]
310
  messages = graph.invoke({"messages": messages})
311
  for m in messages["messages"]:
312
  m.pretty_print()
313
+ """
requirements.txt CHANGED
@@ -8,4 +8,7 @@ langchain-community
8
  langchain-openai
9
  langgraph-cli[inmem]
10
  wikipedia
11
- tavily-python
 
 
 
 
8
  langchain-openai
9
  langgraph-cli[inmem]
10
  wikipedia
11
+ tavily-python
12
+ unstructured
13
+ openpyxl
14
+ networkx