|
|
from smolagents import CodeAgent, LiteLLMModel |
|
|
from smolagents.default_tools import (DuckDuckGoSearchTool, |
|
|
VisitWebpageTool, |
|
|
WikipediaSearchTool, |
|
|
SpeechToTextTool, |
|
|
PythonInterpreterTool) |
|
|
import yaml |
|
|
from final_answer import FinalAnswerTool |
|
|
from tools import (youtube_frames_to_images, use_vision_model, |
|
|
read_file, download_file_from_url, |
|
|
extract_text_from_image, analyze_csv_file, |
|
|
analyze_excel_file, youtube_transcribe, |
|
|
transcribe_audio, review_youtube_video) |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
with open("prompts.yaml", 'r') as stream: |
|
|
prompt_templates = yaml.safe_load(stream) |
|
|
|
|
|
|
|
|
react_model = LiteLLMModel( |
|
|
model_id="huggingface/gemini/gemini-1.5-flash", |
|
|
api_key=os.getenv("GEMINI_KEY"), |
|
|
temperature=0.2 |
|
|
) |
|
|
|
|
|
manager_agent = CodeAgent( |
|
|
model=react_model, |
|
|
tools=[FinalAnswerTool(), |
|
|
DuckDuckGoSearchTool(), |
|
|
VisitWebpageTool(max_output_length=500000), |
|
|
WikipediaSearchTool(extract_format='HTML'), |
|
|
SpeechToTextTool(), |
|
|
youtube_frames_to_images, |
|
|
youtube_transcribe, |
|
|
use_vision_model, |
|
|
read_file, download_file_from_url, |
|
|
extract_text_from_image, |
|
|
analyze_csv_file, analyze_excel_file, |
|
|
transcribe_audio, |
|
|
review_youtube_video |
|
|
], |
|
|
managed_agents=[], |
|
|
additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'], |
|
|
max_steps=20, |
|
|
verbosity_level=1, |
|
|
planning_interval=6, |
|
|
name="Manager", |
|
|
description="The manager of the team, responsible for overseeing and guiding the team's work.", |
|
|
prompt_templates=prompt_templates) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
from smolagents import GradioUI |
|
|
GradioUI(manager_agent).launch() |