ecarr-bend's picture
Update app.py
f2f5170 verified
import gradio as gr
import os
import pinecone
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder
from langchain.agents import AgentExecutor
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.callbacks.base import BaseCallbackHandler
from queue import Queue
from threading import Thread
print("CHECK - Pinecone vector db setup")
# set up OpenAI environment vars and embeddings
openai.api_key = os.environ.get("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings()
# initialize pinecone db
index_name = "kellogg-course-assistant"
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io
environment=os.getenv("PINECONE_ENV"), # next to api key in console
)
# load existing index
vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
retriever = vectorsearch.as_retriever()
print("CHECK - setting up conversational retrieval agent")
# callback handler for streaming
class QueueCallback(BaseCallbackHandler):
"""Callback handler for streaming LLM responses to a queue."""
def __init__(self, q):
self.q = q
def on_llm_new_token(self, token: str, **kwargs: any) -> None:
self.q.put(token)
def on_llm_end(self, *args, **kwargs: any) -> None:
return self.q.empty()
# create retrieval tool
tool = create_retriever_tool(
retriever,
"search_kellogg_site",
"Searches and returns content from within the Kellogg website."
)
tools = [tool]
system_message = SystemMessage(
content=(
"You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. "
"Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and create a list of suggested web course articles for more information. "
"Do not include details of your intermediate steps in the final response. "
"At the end of your response, provide links to relevant web course articles returned by the retriever."
)
)
print("CHECK - setting up gradio chatbot UI")
# build Gradio selectable options in Chat UI
model_type=gr.Dropdown(choices=["gpt-4 + rag",
"gpt-3.5-turbo + rag"],
value="gpt-4 + rag",
type="index",
label="LLM Models"
)
# RAG agent function
def predict(message, model_type):
# clearing RAG memory
# memory.clear()
# Create a Queue
q = Queue()
job_done = object()
# conversational retrieval agent component construction - memory, prompt template, agent, agent executor
# specifying LLM to use
if (model_type==1):
llm = ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k", streaming=True, callbacks=[QueueCallback(q)])
else:
llm = ChatOpenAI(temperature = 0.1, model_name="gpt-4-turbo-preview", streaming=True, callbacks=[QueueCallback(q)])
# This is needed for both the memory and the prompt
memory_key = "history"
memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)
prompt = OpenAIFunctionsAgent.create_prompt(
system_message=system_message,
extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
)
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False, return_intermediate_steps=True)
# Create a funciton to call - this will run in a thread
def task():
resp = agent_executor({"input":message})
q.put(job_done)
# Create a thread and start the function
t = Thread(target=task)
t.start()
content = ""
# Get each new token from the queue and yield for our generator
while True:
try:
next_token = q.get(True, timeout=1)
if next_token is job_done:
break
content += next_token
yield next_token, content
except:
pass
def ask_llm(message, history, model_type):
for next_token, content in predict(message, model_type):
yield(content)
# set up and run chat interface
kellogg_agent = gr.ChatInterface(
fn=ask_llm,
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
title="Kellogg Course AI Assistant",
description="Please provide your questions about courses offered by Kellogg.",
additional_inputs=[model_type],
additional_inputs_accordion_name="AI Assistant Options:",
examples=[["Can you tell me about a marketing major? What would I want from my career if I went that way instead of say strategy?"],
["I'm interested in strategy. Can you give me a recommendation of courses I should consider over the next year?"],
["I'm wanting to know more about advertising. Can you recommend some courses on that subject?"],
["How many credits do I need to graduate?"],
["I loved the Competitive Strategy and industrial structure class. Can you tell me others like that one?"]],
# cache_examples=True,
# retry_btn=None,
# undo_btn="Delete Previous",
clear_btn="Clear",
)
def main():
kellogg_agent.queue().launch()
# start UI
if __name__ == "__main__":
main()