|
|
import asyncio |
|
|
import logging |
|
|
import os |
|
|
import tempfile |
|
|
from typing import Any, Dict, List, Optional, Union |
|
|
import pandas as pd |
|
|
import ast |
|
|
import inspect |
|
|
|
|
|
import openai |
|
|
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader |
|
|
from llama_index.readers.json import JSONReader |
|
|
from llama_index.core.schema import Document |
|
|
from llama_index.core.tools import FunctionTool |
|
|
from llama_index.embeddings.openai import OpenAIEmbedding |
|
|
from llama_index.llms.openai import OpenAI |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
Settings.llm = OpenAI(model="gpt-4o", temperature=0.1) |
|
|
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") |
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
async def get_task_id(question: str) -> str: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Getting task id for question: {question}") |
|
|
question = question.replace('"', '') |
|
|
|
|
|
reader = JSONReader(file_path="../questions.json") |
|
|
documents = reader.load_data() |
|
|
print(len(documents)) |
|
|
|
|
|
index = VectorStoreIndex.from_documents(documents=documents) |
|
|
query_engine = index.as_query_engine(llm=OpenAI(model="gpt-4o-mini")) |
|
|
response = query_engine.query(f"What is the task id of the question: {question}") |
|
|
logger.info(f"Task id: {response.response}") |
|
|
return response.response |
|
|
|
|
|
def transcribe_mp3_file(task_id: str) -> str: |
|
|
"""Transcribe an MP3 file.""" |
|
|
logger.info(f"Transcribing MP3 file for task id: {task_id}") |
|
|
file_path = "../data/" + task_id + ".mp3" |
|
|
|
|
|
audio_file = open(file_path, "rb") |
|
|
|
|
|
transcript = client.audio.transcriptions.create( |
|
|
model="whisper-1", |
|
|
file=audio_file |
|
|
) |
|
|
|
|
|
return transcript.text |
|
|
|
|
|
def get_file_tools(): |
|
|
"""Return all available file processing tools for the agent.""" |
|
|
return [ |
|
|
FunctionTool.from_defaults(transcribe_mp3_file), |
|
|
] |
|
|
|