import gradio as gr import os import pinecone import openai from langchain.embeddings.openai import OpenAIEmbeddings from langchain.chat_models import ChatOpenAI from langchain.vectorstores import Pinecone from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent from langchain.schema.messages import SystemMessage from langchain.prompts import MessagesPlaceholder from langchain.agents import AgentExecutor from langchain.agents.agent_toolkits import create_retriever_tool from langchain.callbacks.base import BaseCallbackHandler from queue import Queue from threading import Thread print("CHECK - Pinecone vector db setup") # set up OpenAI environment vars and embeddings openai.api_key = os.environ.get("OPENAI_API_KEY") embeddings = OpenAIEmbeddings() # initialize pinecone db index_name = "kellogg-course-assistant" pinecone.init( api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io environment=os.getenv("PINECONE_ENV"), # next to api key in console ) # load existing index vectorsearch = Pinecone.from_existing_index(index_name, embeddings) retriever = vectorsearch.as_retriever() print("CHECK - setting up conversational retrieval agent") # callback handler for streaming class QueueCallback(BaseCallbackHandler): """Callback handler for streaming LLM responses to a queue.""" def __init__(self, q): self.q = q def on_llm_new_token(self, token: str, **kwargs: any) -> None: self.q.put(token) def on_llm_end(self, *args, **kwargs: any) -> None: return self.q.empty() # create retrieval tool tool = create_retriever_tool( retriever, "search_kellogg_site", "Searches and returns content from within the Kellogg website." ) tools = [tool] system_message = SystemMessage( content=( "You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. " "Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and create a list of suggested web course articles for more information. " "Do not include details of your intermediate steps in the final response. " "At the end of your response, provide links to relevant web course articles returned by the retriever." ) ) print("CHECK - setting up gradio chatbot UI") # build Gradio selectable options in Chat UI model_type=gr.Dropdown(choices=["gpt-4 + rag", "gpt-3.5-turbo + rag"], value="gpt-4 + rag", type="index", label="LLM Models" ) # RAG agent function def predict(message, model_type): # clearing RAG memory # memory.clear() # Create a Queue q = Queue() job_done = object() # conversational retrieval agent component construction - memory, prompt template, agent, agent executor # specifying LLM to use if (model_type==1): llm = ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k", streaming=True, callbacks=[QueueCallback(q)]) else: llm = ChatOpenAI(temperature = 0.1, model_name="gpt-4-turbo-preview", streaming=True, callbacks=[QueueCallback(q)]) # This is needed for both the memory and the prompt memory_key = "history" memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm) prompt = OpenAIFunctionsAgent.create_prompt( system_message=system_message, extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)] ) agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt) agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False, return_intermediate_steps=True) # Create a funciton to call - this will run in a thread def task(): resp = agent_executor({"input":message}) q.put(job_done) # Create a thread and start the function t = Thread(target=task) t.start() content = "" # Get each new token from the queue and yield for our generator while True: try: next_token = q.get(True, timeout=1) if next_token is job_done: break content += next_token yield next_token, content except: pass def ask_llm(message, history, model_type): for next_token, content in predict(message, model_type): yield(content) # set up and run chat interface kellogg_agent = gr.ChatInterface( fn=ask_llm, chatbot=gr.Chatbot(height=500), textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7), title="Kellogg Course AI Assistant", description="Please provide your questions about courses offered by Kellogg.", additional_inputs=[model_type], additional_inputs_accordion_name="AI Assistant Options:", examples=[["Can you tell me about a marketing major? What would I want from my career if I went that way instead of say strategy?"], ["I'm interested in strategy. Can you give me a recommendation of courses I should consider over the next year?"], ["I'm wanting to know more about advertising. Can you recommend some courses on that subject?"], ["How many credits do I need to graduate?"], ["I loved the Competitive Strategy and industrial structure class. Can you tell me others like that one?"]], # cache_examples=True, # retry_btn=None, # undo_btn="Delete Previous", clear_btn="Clear", ) def main(): kellogg_agent.queue().launch() # start UI if __name__ == "__main__": main()