Lumintroll commited on
Commit
c3b6999
·
1 Parent(s): b50d9e7

Test agent working and tools plan created

Browse files
Files changed (10) hide show
  1. .gitignore +6 -0
  2. .python-version +1 -0
  3. agent_tool_tester.py +76 -0
  4. agent_tools.py +83 -24
  5. app.py +3 -1
  6. pyproject.toml +24 -0
  7. scratchpad.py +133 -0
  8. test.py +22 -0
  9. tools.py +63 -0
  10. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ .venv
3
+ agent_tool_tester.py
4
+ app copy.py
5
+ scratchpad.ipynb
6
+ agent_tool_tester.py
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10.0
agent_tool_tester.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, Tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, InferenceClientModel
2
+ import pandas as pd
3
+ import os
4
+ from requests.exceptions import HTTPError
5
+ from dotenv import load_dotenv
6
+ import requests
7
+ from io import BytesIO
8
+ from typing import IO
9
+ from elevenlabs import ElevenLabs
10
+
11
+ load_dotenv()
12
+
13
+ from huggingface_hub import login, InferenceClient
14
+ login(os.environ.get("API_KEY_HUGGINGFACE"))
15
+
16
+ model = AzureOpenAIServerModel(
17
+ model_id = os.environ.get("AZURE_OPENAI_MODEL"),
18
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
19
+ api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
20
+ api_version=os.environ.get("OPENAI_API_VERSION"),
21
+ max_tokens=4096
22
+ )
23
+
24
+
25
+ @tool
26
+ def audio_transcription_tool(media_data: IO) -> dict:
27
+ """Creates a transcript from an audio or video file.
28
+
29
+ Args:
30
+ media_data (IO): File data
31
+
32
+ Returns:
33
+ dict: Response from the API of transcription and meta-data.
34
+ """
35
+ client = ElevenLabs(
36
+ api_key=os.environ.get("ELEVENLABS_API_KEY"),
37
+ )
38
+ # with open(media_data, 'rb') as af:
39
+ # response = client.speech_to_text.convert(
40
+ # model_id="scribe_v1", file=af, tag_audio_events=False
41
+ # )
42
+ response = client.speech_to_text.convert(
43
+ model_id="scribe_v1", file= media_data, tag_audio_events=False
44
+ )
45
+ return response.text
46
+
47
+ ## This probably would work, but I'm out of credits
48
+ # @tool
49
+ # def audio_transcription_tool(audio_file: str) -> str:
50
+ # """Creates a transcription of the voices detected in an audio file
51
+
52
+ # Args:
53
+ # audio_file (str): path to audio file (mp3, flac)
54
+
55
+ # Returns:
56
+ # str: Transcription text
57
+ # """
58
+ # client = InferenceClient(
59
+ # provider="hf-inference",
60
+ # api_key=os.environ.get("API_KEY_HUGGINGFACE"),
61
+ # )
62
+ # return client.automatic_speech_recognition(audio_file, model="openai/whisper-large-v3")
63
+
64
+ planning_steps = 1
65
+
66
+ agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(),
67
+ audio_transcription_tool], planning_interval=planning_steps, additional_authorized_imports=['pandas', 'requests'])
68
+
69
+ audio_location = '/home/rob/Audiobooks/Super Powereds Year 2/Super Powereds Year 2 Super Powereds, Book 2 (Unabridged) - 002.mp3'
70
+
71
+ # Query
72
+ query = f"""Transcribe the mp3 file: {audio_location}"""
73
+ # Run it!
74
+ result = agent.run(query)
75
+
76
+
agent_tools.py CHANGED
@@ -1,14 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #mp3 transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  #python code running
4
 
5
  #chess analysis
6
 
7
- from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool
8
- import os
9
- from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- load_dotenv()
12
 
13
  model = AzureOpenAIServerModel(
14
  model_id = os.environ.get("AZURE_OPENAI_MODEL"),
@@ -18,24 +95,6 @@ model = AzureOpenAIServerModel(
18
  max_tokens=4096
19
  )
20
 
21
- @tool
22
- def intuition() -> dict:
23
- """This tool provides suggestions (intuition) on which tools to use or approaches based on the task being attempted.
24
- It returns a dict where the keys are tasks and values are intuition.
25
- When you have a task step which includes one of the topics, get the dictionary value from this tool which contains information so you can make better decisions.
26
- Intuition does not change during a run, do not rerun if the intuition is known.
27
- Args:
28
- None
29
- """
30
- suggestions = {'search':'A web search is most efficient for finding individual facts. Wikipedia is better for in-depth information on a topic.'}
31
- return suggestions
32
-
33
- agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), intuition], planning_interval=1)
34
-
35
-
36
- # Query
37
- query = "How long would a cheetah at full speed take to run the length of Pont Alexandre III?"
38
- # Run it!
39
- result = agent.run(query + f"\nIf any intuition topics are relevant, look up the intuition before proceeding with the step. \nIntuition topics available: {str(list(intuition().keys()))}")
40
-
41
 
 
 
1
+
2
+ from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, SpeechToTextTool
3
+ import pandas as pd
4
+ import os
5
+ from requests.exceptions import HTTPError
6
+ from dotenv import load_dotenv
7
+ import requests
8
+ from io import BytesIO
9
+ from typing import IO
10
+
11
+ #Excel reader
12
+ @tool
13
+ def get_remote_file(url: str) -> IO:
14
+ """This tool downloads a file using the requests package, which is often successful in downloading a file when other methods meet a HTTP Error 403: Forbidden.
15
+ It returns IO which can be used as if it were a file in other fuctions which expect file data. The URL must be for the file itself, not a page.
16
+
17
+ Args:
18
+ url (str): Web address of file to download.
19
+
20
+ Returns:
21
+ IO.
22
+ """
23
+ # Send a GET request to the URL
24
+ response = requests.get(url)
25
+
26
+ # Check if the request was successful
27
+ if response.status_code == 200:
28
+ # Use BytesIO to read the content of the response as a binary stream
29
+ return BytesIO(response.content)
30
+
31
+ else:
32
+ print(f"Failed to retrieve the file. Status code: {response.status_code}")
33
+
34
+ @tool
35
+ def excel_reader(file_data: IO) -> pd.DataFrame:
36
+ """
37
+ This tool returns a pandas dataframe from a file locally, from a URL or from bytes data.
38
+ Args:
39
+ file_data: A file location as a string (either a local file or url of xlsx file) or IO file data of an xlsx to read in as a dataframe.
40
+ If a file is forbidden to be accessed directly, another approach is to download the file data with get_remote_file as bytes and use that instead of a URL.
41
+ """
42
+ return pd.read_excel(file_data, engine="openpyxl")
43
+
44
  #mp3 transcription
45
+ @tool
46
+ def audio_transcription_tool(media_data: IO) -> dict:
47
+ """Creates a transcript from an audio or video file. The use of this tool consumes credits, so only use if SpeechToTextTool has not been successful.
48
+
49
+ Args:
50
+ media_data (IO): File data as bytes stream
51
+
52
+ Returns:
53
+ dict: Response from the API of transcription and meta-data.
54
+ """
55
+ client = ElevenLabs(
56
+ api_key=os.environ.get("ELEVENLABS_API_KEY"),
57
+ )
58
+ # with open(media_data, 'rb') as af:
59
+ # response = client.speech_to_text.convert(
60
+ # model_id="scribe_v1", file=af, tag_audio_events=False
61
+ # )
62
+ response = client.speech_to_text.convert(
63
+ model_id="scribe_v1", file= media_data, tag_audio_events=False
64
+ )
65
+ return response.text
66
 
67
  #python code running
68
 
69
  #chess analysis
70
 
71
+ #string reverse
72
+ @tool
73
+ def string_reverser(text: str) -> str:
74
+ """Reverses a string. This can be useful to try if initially a prompt or string seems uninelligable.
75
+
76
+ Args:
77
+ text (str): String that cannot be understood.
78
+
79
+ Returns:
80
+ str: Reversed string
81
+ """
82
+ return text[::-1]
83
+
84
+ custom_tools = [get_remote_file, excel_reader, audio_transcription_tool, string_reverser]
85
+ default_tools = [DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), SpeechToTextTool()]
86
+ tools = custom_tools + default_tools
87
 
88
+ additionals = ["pandas", "numpy", "datetime", "json", "re", "math"]
89
 
90
  model = AzureOpenAIServerModel(
91
  model_id = os.environ.get("AZURE_OPENAI_MODEL"),
 
95
  max_tokens=4096
96
  )
97
 
98
+ planning_steps = 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ agent = CodeAgent(model=model, tools=tools, additional_authorized_imports=additionals, planning_interval=planning_steps)
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -12,10 +13,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
 
15
  print("BasicAgent initialized.")
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent_tools import agent
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
 
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
  class BasicAgent:
15
  def __init__(self):
16
+ self.agent= agent()
17
  print("BasicAgent initialized.")
18
  def __call__(self, question: str) -> str:
19
  print(f"Agent received question (first 50 chars): {question[:50]}...")
20
+ fixed_answer = self.agent.run(question)
21
  print(f"Agent returning fixed answer: {fixed_answer}")
22
  return fixed_answer
23
 
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "ai-agents-final-assignment-template"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10.0"
7
+ dependencies = [
8
+ "dotenv>=0.9.9",
9
+ "duckduckgo-search>=8.0.2",
10
+ "google-genai>=1.16.1",
11
+ "gradio-client>=1.10.1",
12
+ "gradio>=5.31.0",
13
+ "markdownify>=1.1.0",
14
+ "mistralai>=1.7.1",
15
+ "openpyxl>=3.1.5",
16
+ "pandas>=2.2.3",
17
+ "requests>=2.32.3",
18
+ "smolagents[openai,transformers]>=1.16.1",
19
+ "wikipedia-api>=0.8.1",
20
+ "huggingface-hub>=0.32.0",
21
+ "elevenlabs>=2.1.0",
22
+ "jupyter>=1.1.1",
23
+ "transformers>=4.52.3",
24
+ ]
scratchpad.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+
5
+ use_mistral = False
6
+ use_gemini = True
7
+
8
+ import base64
9
+ import requests
10
+ import os
11
+ from mistralai import Mistral
12
+
13
+ def encode_image(image_path):
14
+ """Encode the image to base64."""
15
+ try:
16
+ with open(image_path, "rb") as image_file:
17
+ return base64.b64encode(image_file.read()).decode('utf-8')
18
+ except FileNotFoundError:
19
+ print(f"Error: The file {image_path} was not found.")
20
+ return None
21
+ except Exception as e: # Added general exception handling
22
+ print(f"Error: {e}")
23
+ return None
24
+
25
+ # Path to your image
26
+ image_path = "chess_test.jpg"
27
+
28
+ # Getting the base64 string
29
+ base64_image = encode_image(image_path)
30
+
31
+ # Retrieve the API key from environment variables
32
+ api_key = os.environ.get("API_KEY_MISTRAL")
33
+
34
+ # Specify model
35
+ model = "pixtral-large-latest"
36
+
37
+ # Initialize the Mistral client
38
+ client = Mistral(api_key=api_key)
39
+
40
+ # Define the messages for the chat
41
+ messages = [
42
+ {
43
+ "role": "user",
44
+ "content": [
45
+ {
46
+ "type": "text",
47
+ "text": r"""Below is an image of a chess board mid-game. Only use the image as a reference for the response. NEVER use implicit knowledge of chess or positions.
48
+ The bottom left square is A1, the top right square is H8.
49
+
50
+ Identify the position of all pieces in JSON format: {colour:{piece_type:[coordinates]}}
51
+
52
+ Chess board diagram:"""
53
+ },
54
+ {
55
+ "type": "image_url",
56
+ "image_url": f"data:image/jpeg;base64,{base64_image}"
57
+ }
58
+ ]
59
+ }
60
+ ]
61
+
62
+ if use_mistral:
63
+ # Get the chat response
64
+ chat_response = client.chat.complete(
65
+ model=model,
66
+ messages=messages
67
+ )
68
+
69
+ # Print the content of the response
70
+ print(chat_response.choices[0].message.content)
71
+
72
+ #### Gemini
73
+
74
+ from google import genai
75
+ from google.genai import types
76
+
77
+ # Only run this block for Gemini Developer API
78
+
79
+ client = genai.Client(api_key=os.environ.get("API_KEY_GEMINI2"))
80
+ flash = True
81
+
82
+ if flash:
83
+ google_model = 'gemini-2.5-flash-preview-05-20'
84
+ else:
85
+ google_model = 'gemini-2.5-pro-preview-05-06'
86
+
87
+ chess_prompt = """Using this image of a chess board diagram. Black squares are coloured dark brown, white squares are light brown. A1 is at the bottom left of the image, H8 is at the top right. Complete the following tasks in order:
88
+ Task 1: Count the number of occupied and unoccupied squares in each row. e.g. {'occupied':3, 'unoccupied':5} => STRING
89
+ Task 2: Count the number of each piece type in each row. Check that they add up to the total number of pieces. => STRING
90
+ Task 3: In JSON format note the position of every piece by colour, type and then list of coordinates => JSON
91
+ Task 4: Convert JSON format to FEN string. {'board_fen': <FEN STRING>} => JSON"""
92
+
93
+
94
+ # To run this code you need to install the following dependencies:
95
+ # pip install google-genai
96
+
97
+ import base64
98
+ import os
99
+ from google import genai
100
+ from google.genai import types
101
+
102
+ def generate():
103
+ client = genai.Client(
104
+ api_key=os.environ.get("GEMINI_API_KEY"),
105
+ )
106
+
107
+ model = google_model #"gemini-2.5-pro-preview-05-06"
108
+ contents = [
109
+ types.Content(
110
+ role="user",
111
+ parts=[
112
+ types.Part.from_bytes(
113
+ mime_type="image/jpeg",
114
+ data=base64_image,
115
+ ),
116
+ types.Part.from_text(text=chess_prompt),
117
+ ],
118
+ ),
119
+ ]
120
+ generate_content_config = types.GenerateContentConfig(
121
+ temperature=0.15,
122
+ response_mime_type="text/plain",
123
+ )
124
+
125
+ for chunk in client.models.generate_content_stream(
126
+ model=model,
127
+ contents=contents,
128
+ config=generate_content_config,
129
+ ):
130
+ print(chunk.text, end="")
131
+
132
+ if __name__ == "__main__":
133
+ generate()
test.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from io import BytesIO
4
+
5
+ # URL of the Excel file
6
+ url = r'https://datamillnorth.org/download/2nx5n/a3be8bde-b1d5-4da1-bdf9-258c994c6960/Copy%20of%20Q1%2023-24%20Data%20Mill.xlsx'
7
+
8
+ # Send a GET request to the URL
9
+ response = requests.get(url)
10
+
11
+ # Check if the request was successful
12
+ if response.status_code == 200:
13
+ # Use BytesIO to read the content of the response as a binary stream
14
+ excel_file = BytesIO(response.content)
15
+
16
+ # Read the Excel file into a DataFrame
17
+ df = pd.read_excel(excel_file)
18
+
19
+ # Display the DataFrame
20
+ print(df)
21
+ else:
22
+ print(f"Failed to retrieve the file. Status code: {response.status_code}")
tools.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool
2
+ import pandas as pd
3
+ import os
4
+ from requests.exceptions import HTTPError
5
+ from dotenv import load_dotenv
6
+ from tempfile import TemporaryFile
7
+ import requests
8
+ from io import BytesIO
9
+ from typing import IO, Union
10
+ load_dotenv()
11
+
12
+ model = AzureOpenAIServerModel(
13
+ model_id = os.environ.get("AZURE_OPENAI_MODEL"),
14
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
15
+ api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
16
+ api_version=os.environ.get("OPENAI_API_VERSION"),
17
+ max_tokens=4096
18
+ )
19
+
20
+ @tool
21
+ def get_remote_file(url: str) -> IO:
22
+ """This tool downloads a file using the requests package, which is often successful in downloading a file when other methods meet a HTTP Error 403: Forbidden.
23
+ It returns IO which can be used as if it were a file in other fuctions which expect file data. The URL must be for the file itself, not a page.
24
+
25
+ Args:
26
+ url (str): Web address of file to download.
27
+
28
+ Returns:
29
+ IO: Bytes data
30
+ """
31
+ # Send a GET request to the URL
32
+ response = requests.get(url)
33
+
34
+ # Check if the request was successful
35
+ if response.status_code == 200:
36
+ # Use BytesIO to read the content of the response as a binary stream
37
+ return BytesIO(response.content)
38
+
39
+ else:
40
+ print(f"Failed to retrieve the file. Status code: {response.status_code}")
41
+
42
+ @tool
43
+ def excel_reader(file_data: IO) -> pd.DataFrame:
44
+ """
45
+ This tool returns a pandas dataframe from a file locally, from a URL or from bytes data.
46
+ Args:
47
+ file_data: A file location as a string (either a local file or url of xlsx file) or IO file data of an xlsx to read in as a dataframe.
48
+ If a file is forbidden to be accessed directly, another approach is to download the file data with get_remote_file as bytes and use that instead of a URL.
49
+ """
50
+ return pd.read_excel(file_data, engine="openpyxl")
51
+
52
+ @tool
53
+ def string_reverser(text: str) -> str:
54
+ """Reverses a string. This can be useful to try if initially a prompt or string seems uninelligable.
55
+
56
+ Args:
57
+ text (str): String that cannot be understood.
58
+
59
+ Returns:
60
+ str: Reversed string
61
+ """
62
+ return text[::-1]
63
+
uv.lock ADDED
The diff for this file is too large to render. See raw diff