Spaces:
Sleeping
Sleeping
Agents' guides and prompts
Browse files- mini_agents.py +6 -6
- prompts.yaml +3 -1
mini_agents.py
CHANGED
|
@@ -2,7 +2,7 @@ from smolagents import CodeAgent, InferenceClientModel
|
|
| 2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
| 3 |
from tools import tavily_search_tool, visit_webpage_tool
|
| 4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
| 5 |
-
from vlm_tools import
|
| 6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
| 7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
| 8 |
import os
|
|
@@ -41,9 +41,9 @@ audio_agent = CodeAgent(
|
|
| 41 |
tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
|
| 42 |
max_steps=6,
|
| 43 |
# prompt_templates=PROMPT_TEMPLATE["audio_agent"],
|
| 44 |
-
additional_authorized_imports=["pytube", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
|
| 45 |
name="audio_agent",
|
| 46 |
-
description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
|
| 47 |
)
|
| 48 |
|
| 49 |
vlm_model = InferenceClientModel(
|
|
@@ -53,12 +53,12 @@ vlm_model = InferenceClientModel(
|
|
| 53 |
|
| 54 |
vlm_agent = CodeAgent(
|
| 55 |
model=vlm_model,
|
| 56 |
-
tools=[
|
| 57 |
max_steps=6,
|
| 58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
| 59 |
-
additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
|
| 60 |
name="vlm_agent",
|
| 61 |
-
description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
|
| 62 |
)
|
| 63 |
|
| 64 |
arithmetic_model = InferenceClientModel(
|
|
|
|
| 2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv
|
| 3 |
from tools import tavily_search_tool, visit_webpage_tool
|
| 4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
| 5 |
+
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
|
| 6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
| 7 |
from community_tools import community_tools, get_youtube_transcript_from_url
|
| 8 |
import os
|
|
|
|
| 41 |
tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
|
| 42 |
max_steps=6,
|
| 43 |
# prompt_templates=PROMPT_TEMPLATE["audio_agent"],
|
| 44 |
+
additional_authorized_imports=["pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
|
| 45 |
name="audio_agent",
|
| 46 |
+
description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it. It cannot process videos."
|
| 47 |
)
|
| 48 |
|
| 49 |
vlm_model = InferenceClientModel(
|
|
|
|
| 53 |
|
| 54 |
vlm_agent = CodeAgent(
|
| 55 |
model=vlm_model,
|
| 56 |
+
tools=[image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path],
|
| 57 |
max_steps=6,
|
| 58 |
# prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
|
| 59 |
+
additional_authorized_imports=["pytube", "pytube3", "youtube_dl", "cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
|
| 60 |
name="vlm_agent",
|
| 61 |
+
description="This agent is responsible for downloading images or videos, processing images or videos, detecting objects in them and extracting text from them. It cannot process audios."
|
| 62 |
)
|
| 63 |
|
| 64 |
arithmetic_model = InferenceClientModel(
|
prompts.yaml
CHANGED
|
@@ -192,7 +192,8 @@ system_prompt: |-
|
|
| 192 |
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
| 193 |
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
| 194 |
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
| 195 |
-
10.
|
|
|
|
| 196 |
|
| 197 |
Now Begin!
|
| 198 |
planning:
|
|
@@ -320,6 +321,7 @@ managed_agent:
|
|
| 320 |
{{task}}
|
| 321 |
---
|
| 322 |
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
|
|
|
| 323 |
|
| 324 |
Your final_answer WILL HAVE to contain these parts:
|
| 325 |
### 1. Task outcome (short version):
|
|
|
|
| 192 |
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
| 193 |
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
| 194 |
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
| 195 |
+
10. Some questions might require you to use audio or video files. The file path is often mentioned at the end of the task description (example: abcxyz.mp3). You must pass this file path to your managed agents for them to use as arguments to their tools.
|
| 196 |
+
11. Don't give up! You're in charge of solving the task, not providing directions to solve it.
|
| 197 |
|
| 198 |
Now Begin!
|
| 199 |
planning:
|
|
|
|
| 321 |
{{task}}
|
| 322 |
---
|
| 323 |
You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.
|
| 324 |
+
Your manager may pass you a file path to an audio or video file. You must use this file path as an argument to your tools.
|
| 325 |
|
| 326 |
Your final_answer WILL HAVE to contain these parts:
|
| 327 |
### 1. Task outcome (short version):
|