AI_Agents_Final_Assignment_Template / agent_tool_tester.py
Lumintroll's picture
Test agent working and tools plan created
c3b6999
from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, Tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, InferenceClientModel
import pandas as pd
import os
from requests.exceptions import HTTPError
from dotenv import load_dotenv
import requests
from io import BytesIO
from typing import IO
from elevenlabs import ElevenLabs
load_dotenv()
from huggingface_hub import login, InferenceClient
login(os.environ.get("API_KEY_HUGGINGFACE"))
model = AzureOpenAIServerModel(
model_id = os.environ.get("AZURE_OPENAI_MODEL"),
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
api_version=os.environ.get("OPENAI_API_VERSION"),
max_tokens=4096
)
@tool
def audio_transcription_tool(media_data: IO) -> dict:
"""Creates a transcript from an audio or video file.
Args:
media_data (IO): File data
Returns:
dict: Response from the API of transcription and meta-data.
"""
client = ElevenLabs(
api_key=os.environ.get("ELEVENLABS_API_KEY"),
)
# with open(media_data, 'rb') as af:
# response = client.speech_to_text.convert(
# model_id="scribe_v1", file=af, tag_audio_events=False
# )
response = client.speech_to_text.convert(
model_id="scribe_v1", file= media_data, tag_audio_events=False
)
return response.text
## This probably would work, but I'm out of credits
# @tool
# def audio_transcription_tool(audio_file: str) -> str:
# """Creates a transcription of the voices detected in an audio file
# Args:
# audio_file (str): path to audio file (mp3, flac)
# Returns:
# str: Transcription text
# """
# client = InferenceClient(
# provider="hf-inference",
# api_key=os.environ.get("API_KEY_HUGGINGFACE"),
# )
# return client.automatic_speech_recognition(audio_file, model="openai/whisper-large-v3")
planning_steps = 1
agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(),
audio_transcription_tool], planning_interval=planning_steps, additional_authorized_imports=['pandas', 'requests'])
audio_location = '/home/rob/Audiobooks/Super Powereds Year 2/Super Powereds Year 2 Super Powereds, Book 2 (Unabridged) - 002.mp3'
# Query
query = f"""Transcribe the mp3 file: {audio_location}"""
# Run it!
result = agent.run(query)