File size: 2,193 Bytes
c3b34bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import asyncio
import logging
import os
import tempfile
from typing import Any, Dict, List, Optional, Union
import pandas as pd
import ast
import inspect

import openai
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.json import JSONReader
from llama_index.core.schema import Document
from llama_index.core.tools import FunctionTool
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize OpenAI settings
Settings.llm = OpenAI(model="gpt-4o", temperature=0.1)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def get_task_id(question: str) -> str:

    # db = chromadb.PersistentClient(path="./alfred_chroma_db")
    # chroma_collection = db.get_or_create_collection("alfred")
    # vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    logger.info(f"Getting task id for question: {question}")
    question = question.replace('"', '')

    reader = JSONReader(file_path="../questions.json")
    documents = reader.load_data()
    print(len(documents))

    index = VectorStoreIndex.from_documents(documents=documents)
    query_engine = index.as_query_engine(llm=OpenAI(model="gpt-4o-mini"))
    response = query_engine.query(f"What is the task id of the question: {question}")
    logger.info(f"Task id: {response.response}")
    return response.response

def transcribe_mp3_file(task_id: str) -> str:
    """Transcribe an MP3 file."""
    logger.info(f"Transcribing MP3 file for task id: {task_id}")
    file_path = "../data/" + task_id + ".mp3"
    # get the audio file
    audio_file = open(file_path, "rb")
    # transcribe the audio file
    transcript = client.audio.transcriptions.create(
        model="whisper-1", 
        file=audio_file
    )
    # return the transcript
    return transcript.text

def get_file_tools():
    """Return all available file processing tools for the agent."""
    return [
        FunctionTool.from_defaults(transcribe_mp3_file),
    ]