Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 19, 2025

Commit

6aa97d2

1 Parent(s): ad0b84b

Add more authorized imports for master agent

Browse files

Files changed (3) hide show

load_data.py +0 -8
mini_agents.py +26 -10
prompts.yaml +2 -2

load_data.py DELETED Viewed

@@ -1,8 +0,0 @@
-import datasets
-def download_dataset(dataset_name: str, name: str):
-    dataset = datasets.load_dataset(dataset_name, name, trust_remote_code=True)
-    return dataset
-dataset = download_dataset("gaia-benchmark/GAIA", "2023_all")
-dataset.save_to_disk("GAIA_2023_all")

mini_agents.py CHANGED Viewed

@@ -31,6 +31,26 @@ PROMPT_TEMPLATE = {
     "pandas_agent": prompt_templates
 }
 audio_model = InferenceClientModel(
     model_id=MODEL_CHOICES["audio"][0],
     token=os.getenv("HUGGINGFACE_API_KEY")
@@ -40,10 +60,9 @@ audio_agent = CodeAgent(
     model=audio_model,
     tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
     max_steps=6,
-    # prompt_templates=PROMPT_TEMPLATE["audio_agent"],
-    additional_authorized_imports=["wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
     name="audio_agent",
-    description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it. It cannot process videos."
 )
 vlm_model = InferenceClientModel(
@@ -55,8 +74,7 @@ vlm_agent = CodeAgent(
     model=vlm_model,
     tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path],
     max_steps=6,
-    # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
-    additional_authorized_imports=["pytube", "pytube3", "youtube_dl", "cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
     name="vlm_agent",
     description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios."
 )
@@ -70,8 +88,7 @@ arithmetic_agent = CodeAgent(
     model=arithmetic_model,
     tools=[operate_two_numbers, convert_number],
     max_steps=4,
-    # prompt_templates=PROMPT_TEMPLATE["arithmetic_agent"],
-    additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy'],
     name="arithmetic_agent",
     description="This agent is responsible for performing arithmetic operations on two numbers."
 )
@@ -85,8 +102,7 @@ pandas_agent = CodeAgent(
     model=pandas_model,
     tools=[load_dataframe_from_csv, load_dataframe_from_excel, to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby],
     max_steps=4,
-    # prompt_templates=PROMPT_TEMPLATE["pandas_agent"],
-    additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy'],
     name="pandas_agent",
     description="This agent is responsible for converting data to a dataframe, performing pandas operations on such dataframe and converting the dataframe back to a json or a csv file."
 )
@@ -127,7 +143,7 @@ master_agent = CodeAgent(
     tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
-    additional_authorized_imports=["math", "pandas", "json", "numpy", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'sklearn', 'scipy', 'datetime', 'typing', 'markdownify', 'requests', 'json'],
     verbosity_level=logging.INFO,
     planning_interval=4,
     prompt_templates=PROMPT_TEMPLATE["master_agent"],

     "pandas_agent": prompt_templates
 }
+# Consolidated authorized imports for all agents
+AUTHORIZED_IMPORTS = [
+    # Audio processing
+    "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
+    # Image/Video processing
+    "cv2", "pytesseract", "onnxruntime", "PIL", "bs4",
+    # Data processing
+    "numpy", "pandas", "sklearn", "scipy", "math",
+    # File handling
+    "base64", "io", "json", "os", "pickle",
+    # Visualization
+    "pyplot", "matplotlib",
+    # Machine learning
+    "hmmlearn",
+    # Web
+    "requests",
+    # Utilities
+    "logging", "yaml", "datetime", "typing", "markdownify"
+]
 audio_model = InferenceClientModel(
     model_id=MODEL_CHOICES["audio"][0],
     token=os.getenv("HUGGINGFACE_API_KEY")
     model=audio_model,
     tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
     max_steps=6,
+    additional_authorized_imports=AUTHORIZED_IMPORTS,
     name="audio_agent",
+    description="This agent is responsible for processing audio, transcribing audio and extracting text from it. It cannot process videos."
 )
 vlm_model = InferenceClientModel(
     model=vlm_model,
     tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path],
     max_steps=6,
+    additional_authorized_imports=AUTHORIZED_IMPORTS,
     name="vlm_agent",
     description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios."
 )
     model=arithmetic_model,
     tools=[operate_two_numbers, convert_number],
     max_steps=4,
+    additional_authorized_imports=AUTHORIZED_IMPORTS,
     name="arithmetic_agent",
     description="This agent is responsible for performing arithmetic operations on two numbers."
 )
     model=pandas_model,
     tools=[load_dataframe_from_csv, load_dataframe_from_excel, to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby],
     max_steps=4,
+    additional_authorized_imports=AUTHORIZED_IMPORTS,
     name="pandas_agent",
     description="This agent is responsible for converting data to a dataframe, performing pandas operations on such dataframe and converting the dataframe back to a json or a csv file."
 )
     tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
+    additional_authorized_imports=AUTHORIZED_IMPORTS,
     verbosity_level=logging.INFO,
     planning_interval=4,
     prompt_templates=PROMPT_TEMPLATE["master_agent"],

prompts.yaml CHANGED Viewed

@@ -175,8 +175,8 @@ system_prompt: |-
   ```
   {%- if managed_agents and managed_agents.values() | list %}
-  You can also give tasks to team members.
-  Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.
   Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
   Here is a list of the team members that you can call:
   ```python

   ```
   {%- if managed_agents and managed_agents.values() | list %}
+  You should also give tasks to team members whenever possible. They are very useful to solve complex tasks especially those pertaining to multiple modalities.
+  Calling a team member works the same as for calling a tool: you can simply pass the task description as a string to the member without specifying any argument.
   Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.
   Here is a list of the team members that you can call:
   ```python