Final_Project_Agent_Course

Sleeping

App Files Files Community

Thanh Vinh Vo commited on Jul 9, 2025

Commit

dc4500c

1 Parent(s): ff4161a

update

Browse files

Files changed (1) hide show

app.py +8 -45

app.py CHANGED Viewed

@@ -184,34 +184,6 @@ class BasicAgent:
         self.multimodal_agent = CodeAgent(
             tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
             model= OpenAIServerModel(model_id="gpt-4o"),
-            additional_authorized_imports=[
-                "requests",
-                "bs4",
-                "pandas",
-                "io",
-                "PIL",
-                "chess",
-                "img2text",
-                "PIL.Image",
-                "bytes",
-                "cv2",
-                "numpy",
-                "json",
-                "whisper",
-                "openpyxl"
-            ],
-            name="multimodal_agent",
-            description="""
-                This agent can reason across audio, vision, and text, a.k.a multimodal agent. """,
-            verbosity_level=0,
-            max_steps=10,
-        )
-        self.code_agent = CodeAgent(
-            tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text, extract_table_from_html],
-            model=InferenceClientModel(
-                model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-            ),
             additional_authorized_imports=[
                 "requests",
                 "bs4",
@@ -228,23 +200,16 @@ class BasicAgent:
                 "cv2",
                 "numpy",
                 "chess.engine",
-                "json",
                 "whisper",
                 "openpyxl"
             ],
-            name="code_agent",
             description="""
-                This agent specializes at:
                     - Writing code to solve problem.
                     - Browse the web to find information.
-                    - Solving chess problems.
-                This agent follow rules below when possible:
-                    1. `wikipedia` Python package is provided to interact with Wikipedia pages.
-                    2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
-                    2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
-                    3. Please take the question literally! Do not add any additional information or assumptions.
-            """,
             verbosity_level=0,
             max_steps=10,
         )
@@ -255,8 +220,7 @@ class BasicAgent:
             ),
             tools=[get_file, audio_to_text],
             managed_agents=[
-                self.multimodal_agent,
-                self.code_agent],
             additional_authorized_imports=[
                 "requests",
                 "bs4",
@@ -278,7 +242,7 @@ class BasicAgent:
                 "json",
             ],
             planning_interval=5,
-            max_steps=15,
         )
     def __call__(self, question: str, question_id: str, file_name: str) -> str:
@@ -288,9 +252,8 @@ class BasicAgent:
             Answer the following question (question_id is {question_id}):):
                "{question}""{file}"
             Follow below rules when possible:
-                1. `wikipedia` Python package is provided to interact with Wikipedia pages.
-                2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
-                3. Please take the question literally! Do not add any additional information or assumptions.
         """
         result = self.manager_agent.run(prompt)
         print(f"Agent responded with: {result}")

         self.multimodal_agent = CodeAgent(
             tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
             model= OpenAIServerModel(model_id="gpt-4o"),
             additional_authorized_imports=[
                 "requests",
                 "bs4",
                 "cv2",
                 "numpy",
                 "chess.engine",
                 "whisper",
                 "openpyxl"
+                "json",
             ],
+            name="multimodal_agent",
             description="""
+                 This is a powerful agent, it specializes in:
                     - Writing code to solve problem.
                     - Browse the web to find information.
+                    - Reason across audio, vision, and text, a.k.a multimodal agent. """,
             verbosity_level=0,
             max_steps=10,
         )
             ),
             tools=[get_file, audio_to_text],
             managed_agents=[
+                self.multimodal_agent],
             additional_authorized_imports=[
                 "requests",
                 "bs4",
                 "json",
             ],
             planning_interval=5,
+            max_steps=10,
         )
     def __call__(self, question: str, question_id: str, file_name: str) -> str:
             Answer the following question (question_id is {question_id}):):
                "{question}""{file}"
             Follow below rules when possible:
+               1. Please take the question literally! Do not add any additional information or assumptions.
+               2. Please answer as concisely as possible.
         """
         result = self.manager_agent.run(prompt)
         print(f"Agent responded with: {result}")