Commit
·
1d4199f
1
Parent(s):
0e1166a
Added excel parser
Browse files- agent.py +31 -3
- requirements.txt +4 -1
agent.py
CHANGED
|
@@ -20,6 +20,8 @@ from langchain_community.document_loaders import PythonLoader
|
|
| 20 |
from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
|
| 21 |
from langchain_core.documents.base import Blob
|
| 22 |
|
|
|
|
|
|
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
|
|
@@ -118,6 +120,29 @@ def python_file_reader(file_name: str) -> str:
|
|
| 118 |
return result
|
| 119 |
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
|
| 122 |
def audio_to_text(audio_file_name: str) -> str:
|
| 123 |
"""Listen to audio and extract text from speech
|
|
@@ -160,6 +185,7 @@ tools = [
|
|
| 160 |
python_file_reader,
|
| 161 |
audio_to_text,
|
| 162 |
wikipedia_search,
|
|
|
|
| 163 |
]
|
| 164 |
|
| 165 |
|
|
@@ -192,7 +218,7 @@ def create_graph():
|
|
| 192 |
llm_with_tools = llm.bind_tools(tools)
|
| 193 |
|
| 194 |
# System message
|
| 195 |
-
original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 196 |
system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 197 |
|
| 198 |
sys_msg = SystemMessage(system_prompt_txt)
|
|
@@ -276,10 +302,12 @@ if __name__ == "__main__":
|
|
| 276 |
messages = graph.invoke({"messages": messages})
|
| 277 |
for m in messages["messages"]:
|
| 278 |
m.pretty_print()
|
| 279 |
-
"""
|
| 280 |
|
| 281 |
-
|
|
|
|
|
|
|
| 282 |
messages = [HumanMessage(content=question)]
|
| 283 |
messages = graph.invoke({"messages": messages})
|
| 284 |
for m in messages["messages"]:
|
| 285 |
m.pretty_print()
|
|
|
|
|
|
| 20 |
from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
|
| 21 |
from langchain_core.documents.base import Blob
|
| 22 |
|
| 23 |
+
# excel
|
| 24 |
+
from langchain_community.document_loaders import UnstructuredExcelLoader
|
| 25 |
|
| 26 |
load_dotenv()
|
| 27 |
|
|
|
|
| 120 |
return result
|
| 121 |
|
| 122 |
|
| 123 |
+
# https://python.langchain.com/docs/integrations/document_loaders/microsoft_excel/
|
| 124 |
+
# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.excel.UnstructuredExcelLoader.html
|
| 125 |
+
def excel_file_reader(excel_file_name: str) -> str:
|
| 126 |
+
"""Reads an excel file and returns the content
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
excel_file_name: the filename to read
|
| 130 |
+
"""
|
| 131 |
+
file_path = os.path.join(os.path.dirname(__file__), "files", excel_file_name)
|
| 132 |
+
loader = UnstructuredExcelLoader(file_path, mode="elements")
|
| 133 |
+
|
| 134 |
+
documents = loader.load()
|
| 135 |
+
formatted_search_docs = "\n\n---\n\n"
|
| 136 |
+
|
| 137 |
+
for next_doc in documents:
|
| 138 |
+
formatted_doc = f'<Document source="{excel_file_name}"\n{next_doc.metadata["text_as_html"]}\n</Document>'
|
| 139 |
+
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 140 |
+
|
| 141 |
+
result = f"{{python_code: {formatted_search_docs}}}"
|
| 142 |
+
|
| 143 |
+
return result
|
| 144 |
+
|
| 145 |
+
|
| 146 |
# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
|
| 147 |
def audio_to_text(audio_file_name: str) -> str:
|
| 148 |
"""Listen to audio and extract text from speech
|
|
|
|
| 185 |
python_file_reader,
|
| 186 |
audio_to_text,
|
| 187 |
wikipedia_search,
|
| 188 |
+
excel_file_reader,
|
| 189 |
]
|
| 190 |
|
| 191 |
|
|
|
|
| 218 |
llm_with_tools = llm.bind_tools(tools)
|
| 219 |
|
| 220 |
# System message
|
| 221 |
+
# original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 222 |
system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 223 |
|
| 224 |
sys_msg = SystemMessage(system_prompt_txt)
|
|
|
|
| 302 |
messages = graph.invoke({"messages": messages})
|
| 303 |
for m in messages["messages"]:
|
| 304 |
m.pretty_print()
|
|
|
|
| 305 |
|
| 306 |
+
|
| 307 |
+
print("******** EXCEL TOOL ********")
|
| 308 |
+
question = "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places. File to use is 7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx"
|
| 309 |
messages = [HumanMessage(content=question)]
|
| 310 |
messages = graph.invoke({"messages": messages})
|
| 311 |
for m in messages["messages"]:
|
| 312 |
m.pretty_print()
|
| 313 |
+
"""
|
requirements.txt
CHANGED
|
@@ -8,4 +8,7 @@ langchain-community
|
|
| 8 |
langchain-openai
|
| 9 |
langgraph-cli[inmem]
|
| 10 |
wikipedia
|
| 11 |
-
tavily-python
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
langchain-openai
|
| 9 |
langgraph-cli[inmem]
|
| 10 |
wikipedia
|
| 11 |
+
tavily-python
|
| 12 |
+
unstructured
|
| 13 |
+
openpyxl
|
| 14 |
+
networkx
|