Spaces:
Runtime error
Runtime error
| from smolagents import AzureOpenAIServerModel, CodeAgent, ToolCallingAgent, tool, Tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, InferenceClientModel | |
| import pandas as pd | |
| import os | |
| from requests.exceptions import HTTPError | |
| from dotenv import load_dotenv | |
| import requests | |
| from io import BytesIO | |
| from typing import IO | |
| from elevenlabs import ElevenLabs | |
| load_dotenv() | |
| from huggingface_hub import login, InferenceClient | |
| login(os.environ.get("API_KEY_HUGGINGFACE")) | |
| model = AzureOpenAIServerModel( | |
| model_id = os.environ.get("AZURE_OPENAI_MODEL"), | |
| azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), | |
| api_key=os.environ.get("AZURE_OPENAI_API_KEY"), | |
| api_version=os.environ.get("OPENAI_API_VERSION"), | |
| max_tokens=4096 | |
| ) | |
| def audio_transcription_tool(media_data: IO) -> dict: | |
| """Creates a transcript from an audio or video file. | |
| Args: | |
| media_data (IO): File data | |
| Returns: | |
| dict: Response from the API of transcription and meta-data. | |
| """ | |
| client = ElevenLabs( | |
| api_key=os.environ.get("ELEVENLABS_API_KEY"), | |
| ) | |
| # with open(media_data, 'rb') as af: | |
| # response = client.speech_to_text.convert( | |
| # model_id="scribe_v1", file=af, tag_audio_events=False | |
| # ) | |
| response = client.speech_to_text.convert( | |
| model_id="scribe_v1", file= media_data, tag_audio_events=False | |
| ) | |
| return response.text | |
| ## This probably would work, but I'm out of credits | |
| # @tool | |
| # def audio_transcription_tool(audio_file: str) -> str: | |
| # """Creates a transcription of the voices detected in an audio file | |
| # Args: | |
| # audio_file (str): path to audio file (mp3, flac) | |
| # Returns: | |
| # str: Transcription text | |
| # """ | |
| # client = InferenceClient( | |
| # provider="hf-inference", | |
| # api_key=os.environ.get("API_KEY_HUGGINGFACE"), | |
| # ) | |
| # return client.automatic_speech_recognition(audio_file, model="openai/whisper-large-v3") | |
| planning_steps = 1 | |
| agent = CodeAgent(model=model, tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), | |
| audio_transcription_tool], planning_interval=planning_steps, additional_authorized_imports=['pandas', 'requests']) | |
| audio_location = '/home/rob/Audiobooks/Super Powereds Year 2/Super Powereds Year 2 Super Powereds, Book 2 (Unabridged) - 002.mp3' | |
| # Query | |
| query = f"""Transcribe the mp3 file: {audio_location}""" | |
| # Run it! | |
| result = agent.run(query) | |