from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, Tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, InferenceClientModel import pandas as pd import os from requests.exceptions import HTTPError from dotenv import load_dotenv import requests from io import BytesIO from typing import IO from elevenlabs import ElevenLabs load_dotenv() from huggingface_hub import login, InferenceClient login(os.environ.get("API_KEY_HUGGINGFACE")) model = AzureOpenAIServerModel( model_id = os.environ.get("AZURE_OPENAI_MODEL"), azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), api_key=os.environ.get("AZURE_OPENAI_API_KEY"), api_version=os.environ.get("OPENAI_API_VERSION"), max_tokens=4096 ) @tool def audio_transcription_tool(media_data: IO) -> dict: """Creates a transcript from an audio or video file. Args: media_data (IO): File data Returns: dict: Response from the API of transcription and meta-data. """ client = ElevenLabs( api_key=os.environ.get("ELEVENLABS_API_KEY"), ) # with open(media_data, 'rb') as af: # response = client.speech_to_text.convert( # model_id="scribe_v1", file=af, tag_audio_events=False # ) response = client.speech_to_text.convert( model_id="scribe_v1", file= media_data, tag_audio_events=False ) return response.text ## This probably would work, but I'm out of credits # @tool # def audio_transcription_tool(audio_file: str) -> str: # """Creates a transcription of the voices detected in an audio file # Args: # audio_file (str): path to audio file (mp3, flac) # Returns: # str: Transcription text # """ # client = InferenceClient( # provider="hf-inference", # api_key=os.environ.get("API_KEY_HUGGINGFACE"), # ) # return client.automatic_speech_recognition(audio_file, model="openai/whisper-large-v3") planning_steps = 1 agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), audio_transcription_tool], planning_interval=planning_steps, additional_authorized_imports=['pandas', 'requests']) audio_location = '/home/rob/Audiobooks/Super Powereds Year 2/Super Powereds Year 2 Super Powereds, Book 2 (Unabridged) - 002.mp3' # Query query = f"""Transcribe the mp3 file: {audio_location}""" # Run it! result = agent.run(query)