import asyncio import logging import os import tempfile from typing import Any, Dict, List, Optional, Union import pandas as pd import ast import inspect import openai from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader from llama_index.readers.json import JSONReader from llama_index.core.schema import Document from llama_index.core.tools import FunctionTool from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openai import OpenAI # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize OpenAI settings Settings.llm = OpenAI(model="gpt-4o", temperature=0.1) Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) async def get_task_id(question: str) -> str: # db = chromadb.PersistentClient(path="./alfred_chroma_db") # chroma_collection = db.get_or_create_collection("alfred") # vector_store = ChromaVectorStore(chroma_collection=chroma_collection) logger.info(f"Getting task id for question: {question}") question = question.replace('"', '') reader = JSONReader(file_path="../questions.json") documents = reader.load_data() print(len(documents)) index = VectorStoreIndex.from_documents(documents=documents) query_engine = index.as_query_engine(llm=OpenAI(model="gpt-4o-mini")) response = query_engine.query(f"What is the task id of the question: {question}") logger.info(f"Task id: {response.response}") return response.response def transcribe_mp3_file(task_id: str) -> str: """Transcribe an MP3 file.""" logger.info(f"Transcribing MP3 file for task id: {task_id}") file_path = "../data/" + task_id + ".mp3" # get the audio file audio_file = open(file_path, "rb") # transcribe the audio file transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) # return the transcript return transcript.text def get_file_tools(): """Return all available file processing tools for the agent.""" return [ FunctionTool.from_defaults(transcribe_mp3_file), ]