File size: 2,193 Bytes
c3b34bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import asyncio
import logging
import os
import tempfile
from typing import Any, Dict, List, Optional, Union
import pandas as pd
import ast
import inspect
import openai
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.json import JSONReader
from llama_index.core.schema import Document
from llama_index.core.tools import FunctionTool
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize OpenAI settings
Settings.llm = OpenAI(model="gpt-4o", temperature=0.1)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
async def get_task_id(question: str) -> str:
# db = chromadb.PersistentClient(path="./alfred_chroma_db")
# chroma_collection = db.get_or_create_collection("alfred")
# vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
logger.info(f"Getting task id for question: {question}")
question = question.replace('"', '')
reader = JSONReader(file_path="../questions.json")
documents = reader.load_data()
print(len(documents))
index = VectorStoreIndex.from_documents(documents=documents)
query_engine = index.as_query_engine(llm=OpenAI(model="gpt-4o-mini"))
response = query_engine.query(f"What is the task id of the question: {question}")
logger.info(f"Task id: {response.response}")
return response.response
def transcribe_mp3_file(task_id: str) -> str:
"""Transcribe an MP3 file."""
logger.info(f"Transcribing MP3 file for task id: {task_id}")
file_path = "../data/" + task_id + ".mp3"
# get the audio file
audio_file = open(file_path, "rb")
# transcribe the audio file
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
# return the transcript
return transcript.text
def get_file_tools():
"""Return all available file processing tools for the agent."""
return [
FunctionTool.from_defaults(transcribe_mp3_file),
]
|