| import asyncio |
| import logging |
| import os |
| import tempfile |
| from typing import Any, Dict, List, Optional, Union |
| import pandas as pd |
| import ast |
| import inspect |
|
|
| import openai |
| from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader |
| from llama_index.readers.json import JSONReader |
| from llama_index.core.schema import Document |
| from llama_index.core.tools import FunctionTool |
| from llama_index.embeddings.openai import OpenAIEmbedding |
| from llama_index.llms.openai import OpenAI |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| Settings.llm = OpenAI(model="gpt-4o", temperature=0.1) |
| Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") |
|
|
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
| async def get_task_id(question: str) -> str: |
|
|
| |
| |
| |
| logger.info(f"Getting task id for question: {question}") |
| question = question.replace('"', '') |
|
|
| reader = JSONReader(file_path="../questions.json") |
| documents = reader.load_data() |
| print(len(documents)) |
|
|
| index = VectorStoreIndex.from_documents(documents=documents) |
| query_engine = index.as_query_engine(llm=OpenAI(model="gpt-4o-mini")) |
| response = query_engine.query(f"What is the task id of the question: {question}") |
| logger.info(f"Task id: {response.response}") |
| return response.response |
|
|
| def transcribe_mp3_file(task_id: str) -> str: |
| """Transcribe an MP3 file.""" |
| logger.info(f"Transcribing MP3 file for task id: {task_id}") |
| file_path = "../data/" + task_id + ".mp3" |
| |
| audio_file = open(file_path, "rb") |
| |
| transcript = client.audio.transcriptions.create( |
| model="whisper-1", |
| file=audio_file |
| ) |
| |
| return transcript.text |
|
|
| def get_file_tools(): |
| """Return all available file processing tools for the agent.""" |
| return [ |
| FunctionTool.from_defaults(transcribe_mp3_file), |
| ] |
|
|