sqfoo commited on
Commit
21595fa
·
verified ·
1 Parent(s): 6d176e2

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +4 -5
agent.py CHANGED
@@ -94,6 +94,7 @@ def image_caption(dir: str) -> str:
94
  return metadata[0].page_content
95
 
96
  # 2. Coding
 
97
  # 3. Multi-Modality
98
 
99
  # ("human", f"Question: {question}\nReport to validate: {final_answer}")
@@ -123,7 +124,8 @@ class BasicAgent:
123
  - youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
124
  - read_file: read the content of the attached file by passing the file directory as input
125
  - image_caption: understand the visual content of the attached image by passing the image directory as input
126
-
 
127
  HERE are some examples illustrating how and what tools to call.
128
  ---------------
129
  TASK: "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
@@ -132,16 +134,13 @@ class BasicAgent:
132
  TASK: How many Grammy Awards that Taylor Swift has won.
133
  ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
134
 
135
- TASK: Count how many people in this image.
136
- ACTION: Call the image_caption tool by passing the image directory as input. Then, use LLM to understand the image caption and answer the question.
137
-
138
  TASK: How much the total expense in this spreadsheet?
139
  ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
140
 
141
  TASK: How many All England Title that Lee Chong Wei won?
142
  ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
143
  """
144
- self.tools = [web_search, visit_webpage, wiki_search, youtube_transcript, read_file, image_caption]
145
  self.prompt = ChatPromptTemplate.from_messages([
146
  ("system", self.sys_prompt),
147
  ("human", "{input}")
 
94
  return metadata[0].page_content
95
 
96
  # 2. Coding
97
+ from langchain_experimental.tools import PythonREPLTool
98
  # 3. Multi-Modality
99
 
100
  # ("human", f"Question: {question}\nReport to validate: {final_answer}")
 
124
  - youtube_transcript: fetch the transcript of the Youtube video by passing the video url as input if the question asks for watching a Youtube video
125
  - read_file: read the content of the attached file by passing the file directory as input
126
  - image_caption: understand the visual content of the attached image by passing the image directory as input
127
+ - PythonREPLTool: run the python code
128
+
129
  HERE are some examples illustrating how and what tools to call.
130
  ---------------
131
  TASK: "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
 
134
  TASK: How many Grammy Awards that Taylor Swift has won.
135
  ACTION: Call the web_search tools with the query: 'how many Grammy Awards that Taylor Swift has won.' to extract the answer.
136
 
 
 
 
137
  TASK: How much the total expense in this spreadsheet?
138
  ACTION: Call the read_file tool to extract the content of the provided spreadfile. Then, use LLM to extract the amount of every expense and sum them up.
139
 
140
  TASK: How many All England Title that Lee Chong Wei won?
141
  ACTION: Call wiki_search with the query: "Lee Chong Wei". Extract the relevant row of All England Title and count how many rows is there.
142
  """
143
+ elf.tools = [web_search, visit_webpage, wiki_search, youtube_transcript, read_file, image_caption, PythonREPLTool()]
144
  self.prompt = ChatPromptTemplate.from_messages([
145
  ("system", self.sys_prompt),
146
  ("human", "{input}")