Thanh Vinh Vo commited on
Commit
dc4500c
·
1 Parent(s): ff4161a
Files changed (1) hide show
  1. app.py +8 -45
app.py CHANGED
@@ -184,34 +184,6 @@ class BasicAgent:
184
  self.multimodal_agent = CodeAgent(
185
  tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
186
  model= OpenAIServerModel(model_id="gpt-4o"),
187
- additional_authorized_imports=[
188
- "requests",
189
- "bs4",
190
- "pandas",
191
- "io",
192
- "PIL",
193
- "chess",
194
- "img2text",
195
- "PIL.Image",
196
- "bytes",
197
- "cv2",
198
- "numpy",
199
- "json",
200
- "whisper",
201
- "openpyxl"
202
- ],
203
- name="multimodal_agent",
204
- description="""
205
- This agent can reason across audio, vision, and text, a.k.a multimodal agent. """,
206
- verbosity_level=0,
207
- max_steps=10,
208
- )
209
-
210
- self.code_agent = CodeAgent(
211
- tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text, extract_table_from_html],
212
- model=InferenceClientModel(
213
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
214
- ),
215
  additional_authorized_imports=[
216
  "requests",
217
  "bs4",
@@ -228,23 +200,16 @@ class BasicAgent:
228
  "cv2",
229
  "numpy",
230
  "chess.engine",
231
- "json",
232
  "whisper",
233
  "openpyxl"
 
234
  ],
235
- name="code_agent",
236
  description="""
237
- This agent specializes at:
238
  - Writing code to solve problem.
239
  - Browse the web to find information.
240
- - Solving chess problems.
241
- This agent follow rules below when possible:
242
- 1. `wikipedia` Python package is provided to interact with Wikipedia pages.
243
- 2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
244
- 2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
245
- 3. Please take the question literally! Do not add any additional information or assumptions.
246
-
247
- """,
248
  verbosity_level=0,
249
  max_steps=10,
250
  )
@@ -255,8 +220,7 @@ class BasicAgent:
255
  ),
256
  tools=[get_file, audio_to_text],
257
  managed_agents=[
258
- self.multimodal_agent,
259
- self.code_agent],
260
  additional_authorized_imports=[
261
  "requests",
262
  "bs4",
@@ -278,7 +242,7 @@ class BasicAgent:
278
  "json",
279
  ],
280
  planning_interval=5,
281
- max_steps=15,
282
  )
283
 
284
  def __call__(self, question: str, question_id: str, file_name: str) -> str:
@@ -288,9 +252,8 @@ class BasicAgent:
288
  Answer the following question (question_id is {question_id}):):
289
  "{question}""{file}"
290
  Follow below rules when possible:
291
- 1. `wikipedia` Python package is provided to interact with Wikipedia pages.
292
- 2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
293
- 3. Please take the question literally! Do not add any additional information or assumptions.
294
  """
295
  result = self.manager_agent.run(prompt)
296
  print(f"Agent responded with: {result}")
 
184
  self.multimodal_agent = CodeAgent(
185
  tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
186
  model= OpenAIServerModel(model_id="gpt-4o"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  additional_authorized_imports=[
188
  "requests",
189
  "bs4",
 
200
  "cv2",
201
  "numpy",
202
  "chess.engine",
 
203
  "whisper",
204
  "openpyxl"
205
+ "json",
206
  ],
207
+ name="multimodal_agent",
208
  description="""
209
+ This is a powerful agent, it specializes in:
210
  - Writing code to solve problem.
211
  - Browse the web to find information.
212
+ - Reason across audio, vision, and text, a.k.a multimodal agent. """,
 
 
 
 
 
 
 
213
  verbosity_level=0,
214
  max_steps=10,
215
  )
 
220
  ),
221
  tools=[get_file, audio_to_text],
222
  managed_agents=[
223
+ self.multimodal_agent],
 
224
  additional_authorized_imports=[
225
  "requests",
226
  "bs4",
 
242
  "json",
243
  ],
244
  planning_interval=5,
245
+ max_steps=10,
246
  )
247
 
248
  def __call__(self, question: str, question_id: str, file_name: str) -> str:
 
252
  Answer the following question (question_id is {question_id}):):
253
  "{question}""{file}"
254
  Follow below rules when possible:
255
+ 1. Please take the question literally! Do not add any additional information or assumptions.
256
+ 2. Please answer as concisely as possible.
 
257
  """
258
  result = self.manager_agent.run(prompt)
259
  print(f"Agent responded with: {result}")