Commit ·
0e1166a
1
Parent(s): 5dce464
50% tuned prompt and other stuff
Browse files
agent.py
CHANGED
|
@@ -16,6 +16,11 @@ from langchain_community.tools.tavily_search import TavilySearchResults
|
|
| 16 |
# Python loader
|
| 17 |
from langchain_community.document_loaders import PythonLoader
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
load_dotenv()
|
| 20 |
|
| 21 |
|
|
@@ -33,6 +38,8 @@ def meaning_of_life(a: int, b: int) -> int:
|
|
| 33 |
|
| 34 |
# https://www.restack.io/docs/langchain-knowledge-wikipedia-loader-cat-ai
|
| 35 |
# https://api.python.langchain.com/en/latest/community/document_loaders/langchain_community.document_loaders.wikipedia.WikipediaLoader.html#
|
|
|
|
|
|
|
| 36 |
def wikipedia_search(query: str) -> str:
|
| 37 |
"""Searches Wikipedia for a given query and fetches full document
|
| 38 |
|
|
@@ -49,7 +56,7 @@ def wikipedia_search(query: str) -> str:
|
|
| 49 |
formatted_search_docs = "\n\n---\n\n"
|
| 50 |
|
| 51 |
for next_doc in documents:
|
| 52 |
-
formatted_doc = f'<Document source="{next_doc.metadata["source"]}" title="{next_doc.metadata.get("title", "")}"
|
| 53 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 54 |
|
| 55 |
result = f"{{wiki_results: {formatted_search_docs}}}"
|
|
@@ -79,7 +86,7 @@ def web_search(query: str) -> str:
|
|
| 79 |
title = next_doc["title"]
|
| 80 |
content = next_doc["content"]
|
| 81 |
formatted_doc = (
|
| 82 |
-
f'<Document source="{url}" title="{title}"
|
| 83 |
)
|
| 84 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 85 |
|
|
@@ -102,7 +109,7 @@ def python_file_reader(file_name: str) -> str:
|
|
| 102 |
|
| 103 |
for next_doc in documents:
|
| 104 |
formatted_doc = (
|
| 105 |
-
f'<Document source="{file_name}"
|
| 106 |
)
|
| 107 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 108 |
|
|
@@ -111,7 +118,49 @@ def python_file_reader(file_name: str) -> str:
|
|
| 111 |
return result
|
| 112 |
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
# --- GRAPH ---
|
|
@@ -133,7 +182,7 @@ def create_graph():
|
|
| 133 |
llm = AzureChatOpenAI(
|
| 134 |
azure_deployment=deployment,
|
| 135 |
api_version=api_version,
|
| 136 |
-
temperature=0,
|
| 137 |
max_tokens=None,
|
| 138 |
timeout=None,
|
| 139 |
max_retries=2,
|
|
@@ -144,7 +193,7 @@ def create_graph():
|
|
| 144 |
|
| 145 |
# System message
|
| 146 |
original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 147 |
-
system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 148 |
|
| 149 |
sys_msg = SystemMessage(system_prompt_txt)
|
| 150 |
|
|
@@ -211,7 +260,7 @@ if __name__ == "__main__":
|
|
| 211 |
messages = graph.invoke({"messages": messages})
|
| 212 |
for m in messages["messages"]:
|
| 213 |
m.pretty_print()
|
| 214 |
-
|
| 215 |
|
| 216 |
print("******** PYTHON LOAD TOOL ********")
|
| 217 |
question = "what does this python code do? filename is f918266a-b3e0-4914-865d-4faa564f1aef.py"
|
|
@@ -219,3 +268,18 @@ if __name__ == "__main__":
|
|
| 219 |
messages = graph.invoke({"messages": messages})
|
| 220 |
for m in messages["messages"]:
|
| 221 |
m.pretty_print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Python loader
|
| 17 |
from langchain_community.document_loaders import PythonLoader
|
| 18 |
|
| 19 |
+
# Whisper
|
| 20 |
+
from langchain_community.document_loaders.parsers.audio import AzureOpenAIWhisperParser
|
| 21 |
+
from langchain_core.documents.base import Blob
|
| 22 |
+
|
| 23 |
+
|
| 24 |
load_dotenv()
|
| 25 |
|
| 26 |
|
|
|
|
| 38 |
|
| 39 |
# https://www.restack.io/docs/langchain-knowledge-wikipedia-loader-cat-ai
|
| 40 |
# https://api.python.langchain.com/en/latest/community/document_loaders/langchain_community.document_loaders.wikipedia.WikipediaLoader.html#
|
| 41 |
+
# ¤ I ended up not using this tool since I could not get it to return the table data in the Markov question. The Taveli search tool also find wiki content
|
| 42 |
+
# Better approach could be to combine this tool (to get URL) + a webreader to get content
|
| 43 |
def wikipedia_search(query: str) -> str:
|
| 44 |
"""Searches Wikipedia for a given query and fetches full document
|
| 45 |
|
|
|
|
| 56 |
formatted_search_docs = "\n\n---\n\n"
|
| 57 |
|
| 58 |
for next_doc in documents:
|
| 59 |
+
formatted_doc = f'<Document source="{next_doc.metadata["source"]}" title="{next_doc.metadata.get("title", "")}"\n{next_doc.page_content}\n</Document>'
|
| 60 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 61 |
|
| 62 |
result = f"{{wiki_results: {formatted_search_docs}}}"
|
|
|
|
| 86 |
title = next_doc["title"]
|
| 87 |
content = next_doc["content"]
|
| 88 |
formatted_doc = (
|
| 89 |
+
f'<Document source="{url}" title="{title}"\n{content}\n</Document>'
|
| 90 |
)
|
| 91 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 92 |
|
|
|
|
| 109 |
|
| 110 |
for next_doc in documents:
|
| 111 |
formatted_doc = (
|
| 112 |
+
f'<Document source="{file_name}"\n{next_doc.page_content}\n</Document>'
|
| 113 |
)
|
| 114 |
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 115 |
|
|
|
|
| 118 |
return result
|
| 119 |
|
| 120 |
|
| 121 |
+
# https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.parsers.audio.AzureOpenAIWhisperParser.html
|
| 122 |
+
def audio_to_text(audio_file_name: str) -> str:
|
| 123 |
+
"""Listen to audio and extract text from speech
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
audio_file_name: the audio filename to read
|
| 127 |
+
"""
|
| 128 |
+
file_path = os.path.join(os.path.dirname(__file__), "files", audio_file_name)
|
| 129 |
+
|
| 130 |
+
deployment_name = os.environ.get("AZURE_WHISPER_DEPLOYMENT")
|
| 131 |
+
api_version = os.environ.get("AZURE_WHISPER_API_VERSION")
|
| 132 |
+
api_key = os.environ.get("AZURE_WHISPER_API_KEY")
|
| 133 |
+
azure_endpoint = os.environ.get("AZURE_WHISPER_ENDPOINT")
|
| 134 |
+
|
| 135 |
+
whisper_parser = AzureOpenAIWhisperParser(
|
| 136 |
+
deployment_name=deployment_name,
|
| 137 |
+
api_version=api_version,
|
| 138 |
+
api_key=api_key,
|
| 139 |
+
azure_endpoint=azure_endpoint,
|
| 140 |
+
# other params...
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
audio_blob = Blob(path=file_path)
|
| 144 |
+
response = whisper_parser.parse(audio_blob)
|
| 145 |
+
|
| 146 |
+
formatted_search_docs = "\n\n---\n\n"
|
| 147 |
+
|
| 148 |
+
for next_doc in response:
|
| 149 |
+
formatted_doc = f'<Document source="{audio_file_name}"\n{next_doc.page_content}\n</Document>'
|
| 150 |
+
formatted_search_docs = formatted_search_docs + formatted_doc
|
| 151 |
+
|
| 152 |
+
result = f"{{transscribed_audio: {formatted_search_docs}}}"
|
| 153 |
+
|
| 154 |
+
return result
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
tools = [
|
| 158 |
+
meaning_of_life,
|
| 159 |
+
web_search,
|
| 160 |
+
python_file_reader,
|
| 161 |
+
audio_to_text,
|
| 162 |
+
wikipedia_search,
|
| 163 |
+
]
|
| 164 |
|
| 165 |
|
| 166 |
# --- GRAPH ---
|
|
|
|
| 182 |
llm = AzureChatOpenAI(
|
| 183 |
azure_deployment=deployment,
|
| 184 |
api_version=api_version,
|
| 185 |
+
temperature=0.01,
|
| 186 |
max_tokens=None,
|
| 187 |
timeout=None,
|
| 188 |
max_retries=2,
|
|
|
|
| 193 |
|
| 194 |
# System message
|
| 195 |
original_system_prompt_txt = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 196 |
+
system_prompt_txt = "You are a general AI assistant that uses tools to answer questions. YOUR FINAL ANSWER should be a number represented as digits OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number or how many, only reply with a number represented as digits nothing else, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for an abbreviation or a code only reply with that. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
|
| 197 |
|
| 198 |
sys_msg = SystemMessage(system_prompt_txt)
|
| 199 |
|
|
|
|
| 260 |
messages = graph.invoke({"messages": messages})
|
| 261 |
for m in messages["messages"]:
|
| 262 |
m.pretty_print()
|
| 263 |
+
|
| 264 |
|
| 265 |
print("******** PYTHON LOAD TOOL ********")
|
| 266 |
question = "what does this python code do? filename is f918266a-b3e0-4914-865d-4faa564f1aef.py"
|
|
|
|
| 268 |
messages = graph.invoke({"messages": messages})
|
| 269 |
for m in messages["messages"]:
|
| 270 |
m.pretty_print()
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
print("******** TRANSSCRIBE AUDIO TOOL ********")
|
| 274 |
+
question = "Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :( Could you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order. File to use is 1f975693-876d-457b-a649-393859e79bf3.mp3"
|
| 275 |
+
messages = [HumanMessage(content=question)]
|
| 276 |
+
messages = graph.invoke({"messages": messages})
|
| 277 |
+
for m in messages["messages"]:
|
| 278 |
+
m.pretty_print()
|
| 279 |
+
"""
|
| 280 |
+
|
| 281 |
+
question = "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
|
| 282 |
+
messages = [HumanMessage(content=question)]
|
| 283 |
+
messages = graph.invoke({"messages": messages})
|
| 284 |
+
for m in messages["messages"]:
|
| 285 |
+
m.pretty_print()
|