Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -94,6 +94,7 @@ def image_caption(dir: str) -> str:
|
|
| 94 |
return metadata[0].page_content
|
| 95 |
|
| 96 |
# 2. Coding
|
|
|
|
| 97 |
# 3. Multi-Modality
|
| 98 |
|
| 99 |
# ("human", f"Question: {question}\nReport to validate: {final_answer}")
|
|
@@ -123,7 +124,8 @@ class BasicAgent:
|
|
| 123 |
- youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
|
| 124 |
- read_file: read the content of the attached file by passing the file directory as input
|
| 125 |
- image_caption: understand the visual content of the attached image by passing the image directory as input
|
| 126 |
-
|
|
|
|
| 127 |
HERE are some examples illustrating how and what tools to call.
|
| 128 |
---------------
|
| 129 |
TASK: "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
|
@@ -132,16 +134,13 @@ class BasicAgent:
|
|
| 132 |
TASK: How many Grammy Awards that Taylor Swift has won.
|
| 133 |
ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
|
| 134 |
|
| 135 |
-
TASK: Count how many people in this image.
|
| 136 |
-
ACTION: Call the image_caption tool by passing the image directory as input. Then, use LLM to understand the image caption and answer the question.
|
| 137 |
-
|
| 138 |
TASK: How much the total expense in this spreadsheet?
|
| 139 |
ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
|
| 140 |
|
| 141 |
TASK: How many All England Title that Lee Chong Wei won?
|
| 142 |
ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
|
| 143 |
"""
|
| 144 |
-
|
| 145 |
self.prompt = ChatPromptTemplate.from_messages([
|
| 146 |
("system", self.sys_prompt),
|
| 147 |
("human", "{input}")
|
|
|
|
| 94 |
return metadata[0].page_content
|
| 95 |
|
| 96 |
# 2. Coding
|
| 97 |
+
from langchain_experimental.tools import PythonREPLTool
|
| 98 |
# 3. Multi-Modality
|
| 99 |
|
| 100 |
# ("human", f"Question: {question}\nReport to validate: {final_answer}")
|
|
|
|
| 124 |
- youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
|
| 125 |
- read_file: read the content of the attached file by passing the file directory as input
|
| 126 |
- image_caption: understand the visual content of the attached image by passing the image directory as input
|
| 127 |
+
- PythonREPLTool: run the python code
|
| 128 |
+
|
| 129 |
HERE are some examples illustrating how and what tools to call.
|
| 130 |
---------------
|
| 131 |
TASK: "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
|
|
|
| 134 |
TASK: How many Grammy Awards that Taylor Swift has won.
|
| 135 |
ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
|
| 136 |
|
|
|
|
|
|
|
|
|
|
| 137 |
TASK: How much the total expense in this spreadsheet?
|
| 138 |
ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
|
| 139 |
|
| 140 |
TASK: How many All England Title that Lee Chong Wei won?
|
| 141 |
ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
|
| 142 |
"""
|
| 143 |
+
elf.tools = [web_search, visit_webpage, wiki_search, youtube_transcript, read_file, image_caption, PythonREPLTool()]
|
| 144 |
self.prompt = ChatPromptTemplate.from_messages([
|
| 145 |
("system", self.sys_prompt),
|
| 146 |
("human", "{input}")
|