AI_Agents_Final_Assignment_Template

Runtime error

App Files Files Community

Lumintroll commited on May 29, 2025

Commit

c3b6999

1 Parent(s): b50d9e7

Test agent working and tools plan created

Browse files

Files changed (10) hide show

.gitignore +6 -0
.python-version +1 -0
agent_tool_tester.py +76 -0
agent_tools.py +83 -24
app.py +3 -1
pyproject.toml +24 -0
scratchpad.py +133 -0
test.py +22 -0
tools.py +63 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+.venv
+agent_tool_tester.py
+app copy.py
+scratchpad.ipynb
+agent_tool_tester.py

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10.0

agent_tool_tester.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, Tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, InferenceClientModel
+import pandas as pd
+import os
+from requests.exceptions import HTTPError
+from dotenv import load_dotenv
+import requests
+from io import BytesIO
+from typing import IO
+from elevenlabs import ElevenLabs
+load_dotenv()
+from huggingface_hub import login, InferenceClient
+login(os.environ.get("API_KEY_HUGGINGFACE"))
+model = AzureOpenAIServerModel(
+    model_id = os.environ.get("AZURE_OPENAI_MODEL"),
+    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
+    api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
+    api_version=os.environ.get("OPENAI_API_VERSION"),
+    max_tokens=4096
+)
+@tool
+def audio_transcription_tool(media_data: IO) -> dict:
+    """Creates a transcript from an audio or video file.
+    Args:
+        media_data (IO): File data
+    Returns:
+        dict: Response from the API of transcription and meta-data.
+    """
+    client = ElevenLabs(
+        api_key=os.environ.get("ELEVENLABS_API_KEY"),
+    )
+    # with open(media_data, 'rb') as af:
+    #     response = client.speech_to_text.convert(
+    #         model_id="scribe_v1", file=af, tag_audio_events=False
+    #         )
+    response = client.speech_to_text.convert(
+            model_id="scribe_v1", file= media_data, tag_audio_events=False
+            )
+    return response.text
+## This probably would work, but I'm out of credits
+# @tool
+# def audio_transcription_tool(audio_file: str) -> str:
+#     """Creates a transcription of the voices detected in an audio file
+#     Args:
+#         audio_file (str): path to audio file (mp3, flac)
+#     Returns:
+#         str: Transcription text
+#     """
+#     client = InferenceClient(
+#         provider="hf-inference",
+#         api_key=os.environ.get("API_KEY_HUGGINGFACE"),
+#     )
+#     return client.automatic_speech_recognition(audio_file, model="openai/whisper-large-v3")
+planning_steps = 1
+agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(),
+                                      audio_transcription_tool], planning_interval=planning_steps, additional_authorized_imports=['pandas', 'requests'])
+audio_location = '/home/rob/Audiobooks/Super Powereds Year 2/Super Powereds Year 2 Super Powereds, Book 2 (Unabridged) - 002.mp3'
+# Query
+query = f"""Transcribe the mp3 file: {audio_location}"""
+# Run it!
+result = agent.run(query)

agent_tools.py CHANGED Viewed

@@ -1,14 +1,91 @@
 #mp3 transcription
 #python code running
 #chess analysis
-from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool
-import os
-from dotenv import load_dotenv
-load_dotenv()
 model = AzureOpenAIServerModel(
     model_id = os.environ.get("AZURE_OPENAI_MODEL"),
@@ -18,24 +95,6 @@ model = AzureOpenAIServerModel(
     max_tokens=4096
 )
-@tool
-def intuition() -> dict:
-    """This tool provides suggestions (intuition) on which tools to use or approaches based on the task being attempted.
-    It returns a dict where the keys are tasks and values are intuition.
-    When you have a task step which includes one of the topics, get the dictionary value from this tool which contains information so you can make better decisions.
-    Intuition does not change during a run, do not rerun if the intuition is known.
-    Args:
-        None
-    """
-    suggestions = {'search':'A web search is most efficient for finding individual facts. Wikipedia is better for in-depth information on a topic.'}
-    return suggestions
-agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), intuition], planning_interval=1)
-# Query
-query = "How long would a cheetah at full speed take to run the length of Pont Alexandre III?"
-# Run it!
-result = agent.run(query + f"\nIf any intuition topics are relevant, look up the intuition before proceeding with the step. \nIntuition topics available: {str(list(intuition().keys()))}")

+from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, SpeechToTextTool
+import pandas as pd
+import os
+from requests.exceptions import HTTPError
+from dotenv import load_dotenv
+import requests
+from io import BytesIO
+from typing import IO
+#Excel reader
+@tool
+def get_remote_file(url: str) -> IO:
+    """This tool downloads a file using the requests package, which is often successful in downloading a file when other methods meet a HTTP Error 403: Forbidden.
+    It returns IO which can be used as if it were a file in other fuctions which expect file data. The URL must be for the file itself, not a page.
+    Args:
+        url (str): Web address of file to download.
+    Returns:
+        IO.
+    """
+    # Send a GET request to the URL
+    response = requests.get(url)
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Use BytesIO to read the content of the response as a binary stream
+        return BytesIO(response.content)
+    else:
+        print(f"Failed to retrieve the file. Status code: {response.status_code}")
+@tool
+def excel_reader(file_data: IO) -> pd.DataFrame:
+    """
+    This tool returns a pandas dataframe from a file locally, from a URL or from bytes data.
+    Args:
+        file_data: A file location as a string (either a local file or url of xlsx file) or IO file data of an xlsx to read in as a dataframe.
+        If a file is forbidden to be accessed directly, another approach is to download the file data with get_remote_file as bytes and use that instead of a URL.
+    """
+    return pd.read_excel(file_data, engine="openpyxl")
 #mp3 transcription
+@tool
+def audio_transcription_tool(media_data: IO) -> dict:
+    """Creates a transcript from an audio or video file. The use of this tool consumes credits, so only use if SpeechToTextTool has not been successful.
+    Args:
+        media_data (IO): File data as bytes stream
+    Returns:
+        dict: Response from the API of transcription and meta-data.
+    """
+    client = ElevenLabs(
+        api_key=os.environ.get("ELEVENLABS_API_KEY"),
+    )
+    # with open(media_data, 'rb') as af:
+    #     response = client.speech_to_text.convert(
+    #         model_id="scribe_v1", file=af, tag_audio_events=False
+    #         )
+    response = client.speech_to_text.convert(
+            model_id="scribe_v1", file= media_data, tag_audio_events=False
+            )
+    return response.text
 #python code running
 #chess analysis
+#string reverse
+@tool
+def string_reverser(text: str) -> str:
+    """Reverses a string. This can be useful to try if initially a prompt or string seems uninelligable.
+    Args:
+        text (str): String that cannot be understood.
+    Returns:
+        str: Reversed string
+    """
+    return text[::-1]
+custom_tools = [get_remote_file, excel_reader, audio_transcription_tool, string_reverser]
+default_tools = [DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), SpeechToTextTool()]
+tools = custom_tools + default_tools
+additionals = ["pandas", "numpy", "datetime", "json", "re", "math"]
 model = AzureOpenAIServerModel(
     model_id = os.environ.get("AZURE_OPENAI_MODEL"),
     max_tokens=4096
 )
+planning_steps = 3
+agent = CodeAgent(model=model, tools=tools, additional_authorized_imports=additionals, planning_interval=planning_steps)

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -12,10 +13,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer

 import requests
 import inspect
 import pandas as pd
+from agent_tools import agent
 # (Keep Constants as is)
 # --- Constants ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        self.agent= agent()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = self.agent.run(question)
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "ai-agents-final-assignment-template"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10.0"
+dependencies = [
+    "dotenv>=0.9.9",
+    "duckduckgo-search>=8.0.2",
+    "google-genai>=1.16.1",
+    "gradio-client>=1.10.1",
+    "gradio>=5.31.0",
+    "markdownify>=1.1.0",
+    "mistralai>=1.7.1",
+    "openpyxl>=3.1.5",
+    "pandas>=2.2.3",
+    "requests>=2.32.3",
+    "smolagents[openai,transformers]>=1.16.1",
+    "wikipedia-api>=0.8.1",
+    "huggingface-hub>=0.32.0",
+    "elevenlabs>=2.1.0",
+    "jupyter>=1.1.1",
+    "transformers>=4.52.3",
+]

scratchpad.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from dotenv import load_dotenv
+load_dotenv()
+use_mistral = False
+use_gemini = True
+import base64
+import requests
+import os
+from mistralai import Mistral
+def encode_image(image_path):
+    """Encode the image to base64."""
+    try:
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    except FileNotFoundError:
+        print(f"Error: The file {image_path} was not found.")
+        return None
+    except Exception as e:  # Added general exception handling
+        print(f"Error: {e}")
+        return None
+# Path to your image
+image_path = "chess_test.jpg"
+# Getting the base64 string
+base64_image = encode_image(image_path)
+# Retrieve the API key from environment variables
+api_key = os.environ.get("API_KEY_MISTRAL")
+# Specify model
+model = "pixtral-large-latest"
+# Initialize the Mistral client
+client = Mistral(api_key=api_key)
+# Define the messages for the chat
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": r"""Below is an image of a chess board mid-game. Only use the image as a reference for the response. NEVER use implicit knowledge of chess or positions.
+                The bottom left square is A1, the top right square is H8.
+                Identify the position of all pieces in JSON format: {colour:{piece_type:[coordinates]}}
+                Chess board diagram:"""
+            },
+            {
+                "type": "image_url",
+                "image_url": f"data:image/jpeg;base64,{base64_image}"
+            }
+        ]
+    }
+]
+if use_mistral:
+    # Get the chat response
+    chat_response = client.chat.complete(
+        model=model,
+        messages=messages
+    )
+    # Print the content of the response
+    print(chat_response.choices[0].message.content)
+#### Gemini
+from google import genai
+from google.genai import types
+# Only run this block for Gemini Developer API
+client = genai.Client(api_key=os.environ.get("API_KEY_GEMINI2"))
+flash = True
+if flash:
+    google_model = 'gemini-2.5-flash-preview-05-20'
+else:
+    google_model = 'gemini-2.5-pro-preview-05-06'
+chess_prompt = """Using this image of a chess board diagram. Black squares are coloured dark brown, white squares are light brown. A1 is at the bottom left of the image, H8 is at the top right. Complete the following tasks in order:
+Task 1: Count the number of occupied and unoccupied squares in each row. e.g. {'occupied':3, 'unoccupied':5} => STRING
+Task 2: Count the number of each piece type in each row. Check that they add up to the total number of pieces. => STRING
+Task 3: In JSON format note the position of every piece by colour, type and then list of coordinates => JSON
+Task 4: Convert JSON format to FEN string. {'board_fen': <FEN STRING>} => JSON"""
+# To run this code you need to install the following dependencies:
+# pip install google-genai
+import base64
+import os
+from google import genai
+from google.genai import types
+def generate():
+    client = genai.Client(
+        api_key=os.environ.get("GEMINI_API_KEY"),
+    )
+    model = google_model #"gemini-2.5-pro-preview-05-06"
+    contents = [
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_bytes(
+                    mime_type="image/jpeg",
+                    data=base64_image,
+                ),
+                types.Part.from_text(text=chess_prompt),
+            ],
+        ),
+    ]
+    generate_content_config = types.GenerateContentConfig(
+        temperature=0.15,
+        response_mime_type="text/plain",
+    )
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        print(chunk.text, end="")
+if __name__ == "__main__":
+    generate()

test.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import requests
+import pandas as pd
+from io import BytesIO
+# URL of the Excel file
+url = r'https://datamillnorth.org/download/2nx5n/a3be8bde-b1d5-4da1-bdf9-258c994c6960/Copy%20of%20Q1%2023-24%20Data%20Mill.xlsx'
+# Send a GET request to the URL
+response = requests.get(url)
+# Check if the request was successful
+if response.status_code == 200:
+    # Use BytesIO to read the content of the response as a binary stream
+    excel_file = BytesIO(response.content)
+    # Read the Excel file into a DataFrame
+    df = pd.read_excel(excel_file)
+    # Display the DataFrame
+    print(df)
+else:
+    print(f"Failed to retrieve the file. Status code: {response.status_code}")

tools.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool
+import pandas as pd
+import os
+from requests.exceptions import HTTPError
+from dotenv import load_dotenv
+from tempfile import TemporaryFile
+import requests
+from io import BytesIO
+from typing import IO, Union
+load_dotenv()
+model = AzureOpenAIServerModel(
+    model_id = os.environ.get("AZURE_OPENAI_MODEL"),
+    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
+    api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
+    api_version=os.environ.get("OPENAI_API_VERSION"),
+    max_tokens=4096
+)
+@tool
+def get_remote_file(url: str) -> IO:
+    """This tool downloads a file using the requests package, which is often successful in downloading a file when other methods meet a HTTP Error 403: Forbidden.
+    It returns IO which can be used as if it were a file in other fuctions which expect file data. The URL must be for the file itself, not a page.
+    Args:
+        url (str): Web address of file to download.
+    Returns:
+        IO: Bytes data
+    """
+    # Send a GET request to the URL
+    response = requests.get(url)
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Use BytesIO to read the content of the response as a binary stream
+        return BytesIO(response.content)
+    else:
+        print(f"Failed to retrieve the file. Status code: {response.status_code}")
+@tool
+def excel_reader(file_data: IO) -> pd.DataFrame:
+    """
+    This tool returns a pandas dataframe from a file locally, from a URL or from bytes data.
+    Args:
+        file_data: A file location as a string (either a local file or url of xlsx file) or IO file data of an xlsx to read in as a dataframe.
+        If a file is forbidden to be accessed directly, another approach is to download the file data with get_remote_file as bytes and use that instead of a URL.
+    """
+    return pd.read_excel(file_data, engine="openpyxl")
+@tool
+def string_reverser(text: str) -> str:
+    """Reverses a string. This can be useful to try if initially a prompt or string seems uninelligable.
+    Args:
+        text (str): String that cannot be understood.
+    Returns:
+        str: Reversed string
+    """
+    return text[::-1]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff