Spaces:

Compoze
/

kellogg-course-assistant

Paused

App Files Files Community

kellogg-course-assistant / app.py

ecarr-bend

Update app.py

f2f5170 verified about 2 years ago

raw

history blame contribute delete

5.44 kB

	import gradio as gr
	import os
	import pinecone
	import openai

	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.chat_models import ChatOpenAI
	from langchain.vectorstores import Pinecone

	from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
	from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
	from langchain.schema.messages import SystemMessage
	from langchain.prompts import MessagesPlaceholder
	from langchain.agents import AgentExecutor
	from langchain.agents.agent_toolkits import create_retriever_tool

	from langchain.callbacks.base import BaseCallbackHandler

	from queue import Queue
	from threading import Thread

	print("CHECK - Pinecone vector db setup")

	# set up OpenAI environment vars and embeddings
	openai.api_key = os.environ.get("OPENAI_API_KEY")
	embeddings = OpenAIEmbeddings()

	# initialize pinecone db
	index_name = "kellogg-course-assistant"

	pinecone.init(
	api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io
	environment=os.getenv("PINECONE_ENV"), # next to api key in console
	)

	# load existing index
	vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
	retriever = vectorsearch.as_retriever()

	print("CHECK - setting up conversational retrieval agent")

	# callback handler for streaming
	class QueueCallback(BaseCallbackHandler):
	"""Callback handler for streaming LLM responses to a queue."""

	def __init__(self, q):
	self.q = q

	def on_llm_new_token(self, token: str, **kwargs: any) -> None:
	self.q.put(token)

	def on_llm_end(self, args, *kwargs: any) -> None:
	return self.q.empty()

	# create retrieval tool
	tool = create_retriever_tool(
	retriever,
	"search_kellogg_site",
	"Searches and returns content from within the Kellogg website."
	)
	tools = [tool]

	system_message = SystemMessage(
	content=(
	"You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. "
	"Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and create a list of suggested web course articles for more information. "
	"Do not include details of your intermediate steps in the final response. "
	"At the end of your response, provide links to relevant web course articles returned by the retriever."
	)
	)

	print("CHECK - setting up gradio chatbot UI")

	# build Gradio selectable options in Chat UI
	model_type=gr.Dropdown(choices=["gpt-4 + rag",
	"gpt-3.5-turbo + rag"],
	value="gpt-4 + rag",
	type="index",
	label="LLM Models"
	)

	# RAG agent function
	def predict(message, model_type):
	# clearing RAG memory
	# memory.clear()

	# Create a Queue
	q = Queue()
	job_done = object()

	# conversational retrieval agent component construction - memory, prompt template, agent, agent executor
	# specifying LLM to use
	if (model_type==1):
	llm = ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k", streaming=True, callbacks=[QueueCallback(q)])
	else:
	llm = ChatOpenAI(temperature = 0.1, model_name="gpt-4-turbo-preview", streaming=True, callbacks=[QueueCallback(q)])

	# This is needed for both the memory and the prompt
	memory_key = "history"
	memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)

	prompt = OpenAIFunctionsAgent.create_prompt(
	system_message=system_message,
	extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
	)

	agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
	agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False, return_intermediate_steps=True)

	# Create a funciton to call - this will run in a thread
	def task():
	resp = agent_executor({"input":message})
	q.put(job_done)

	# Create a thread and start the function
	t = Thread(target=task)
	t.start()

	content = ""

	# Get each new token from the queue and yield for our generator
	while True:
	try:
	next_token = q.get(True, timeout=1)
	if next_token is job_done:
	break
	content += next_token
	yield next_token, content
	except:
	pass

	def ask_llm(message, history, model_type):
	for next_token, content in predict(message, model_type):
	yield(content)

	# set up and run chat interface
	kellogg_agent = gr.ChatInterface(
	fn=ask_llm,
	chatbot=gr.Chatbot(height=500),
	textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
	title="Kellogg Course AI Assistant",
	description="Please provide your questions about courses offered by Kellogg.",
	additional_inputs=[model_type],
	additional_inputs_accordion_name="AI Assistant Options:",
	examples=[["Can you tell me about a marketing major? What would I want from my career if I went that way instead of say strategy?"],
	["I'm interested in strategy. Can you give me a recommendation of courses I should consider over the next year?"],
	["I'm wanting to know more about advertising. Can you recommend some courses on that subject?"],
	["How many credits do I need to graduate?"],
	["I loved the Competitive Strategy and industrial structure class. Can you tell me others like that one?"]],
	# cache_examples=True,
	# retry_btn=None,
	# undo_btn="Delete Previous",
	clear_btn="Clear",
	)

	def main():
	kellogg_agent.queue().launch()

	# start UI
	if __name__ == "__main__":
	main()