import gradio as gr import os import pinecone import openai from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chat_models import ChatOpenAI from langchain.vectorstores import Pinecone from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent from langchain.schema.messages import SystemMessage from langchain.prompts import MessagesPlaceholder from langchain.agents import AgentExecutor from langchain.agents.agent_toolkits import create_retriever_tool # Function to read files from a folder def read_files_from_folder(folder_path): file_data = {} for filename in os.listdir(folder_path): if filename.endswith(".txt"): # Assuming text files with open(os.path.join(folder_path, filename), 'r') as f: file_data[filename] = f.read() return file_data print("CHECK - Pinecone vector db setup") # set up OpenAI environment vars and embeddings openai.api_key = os.environ.get("OPENAI_API_KEY") embeddings = OpenAIEmbeddings() # initialize pinecone db index_name = "kellogg-course-assistant" pinecone.init( api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENV"), ) # Read files from the "kellogg" folder into a dictionary kellogg_data = read_files_from_folder("kellogg") # Transform the text content to vectors kellogg_vectors = {key: embeddings.transform(value) for key, value in kellogg_data.items()} # Upload the vectors to Pinecone vectorsearch = Pinecone.from_existing_index(index_name, embeddings) vectorsearch.upsert(items=kellogg_vectors) # load existing index retriever = vectorsearch.as_retriever() print("CHECK - setting up conversational retrieval agent") # create LLM llm4 = ChatOpenAI(temperature = 0.1, model_name="gpt-4") llm35 = ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k") llm = llm4 # create retrieval tool tool = create_retriever_tool( retriever, "search_kellogg_site", "Searches and returns content from within the Kellogg website." ) tools = [tool] # conversational retrieval agent component construction - memory, prompt template, agent, agent executor # This is needed for both the memory and the prompt memory_key = "history" memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm) # memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, max_history=0, max_token_limit= 4000) system_message = SystemMessage( content=( "You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. " "Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and providing a list of suggested web course articles for more information. " "Format your answer with distinct