File size: 5,443 Bytes
482c177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abe7873
482c177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9048350
482c177
f2f5170
482c177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import gradio as gr
import os
import pinecone
import openai

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone

from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder
from langchain.agents import AgentExecutor
from langchain.agents.agent_toolkits import create_retriever_tool

from langchain.callbacks.base import BaseCallbackHandler

from queue import Queue
from threading import Thread

print("CHECK - Pinecone vector db setup")

# set up OpenAI environment vars and embeddings
openai.api_key = os.environ.get("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings()

# initialize pinecone db
index_name = "kellogg-course-assistant"

pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
	environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

# load existing index
vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
retriever = vectorsearch.as_retriever()

print("CHECK - setting up conversational retrieval agent")

# callback handler for streaming
class QueueCallback(BaseCallbackHandler):
    """Callback handler for streaming LLM responses to a queue."""

    def __init__(self, q):
        self.q = q

    def on_llm_new_token(self, token: str, **kwargs: any) -> None:
        self.q.put(token)

    def on_llm_end(self, *args, **kwargs: any) -> None:
        return self.q.empty()

# create retrieval tool
tool = create_retriever_tool(
    retriever, 
    "search_kellogg_site",
    "Searches and returns content from within the Kellogg website."
)
tools = [tool]

system_message = SystemMessage(
        content=(
            "You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. "
            "Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and create a list of suggested web course articles for more information. "
            "Do not include details of your intermediate steps in the final response. "
            "At the end of your response, provide links to relevant web course articles returned by the retriever."
        )
)

print("CHECK - setting up gradio chatbot UI")

# build Gradio selectable options in Chat UI
model_type=gr.Dropdown(choices=["gpt-4 + rag", 
								"gpt-3.5-turbo + rag"], 
								value="gpt-4 + rag",
								type="index",
								label="LLM Models"
)

# RAG agent function
def predict(message, model_type):
	# clearing RAG memory
	# memory.clear()
	
	# Create a Queue
	q = Queue()
	job_done = object()
	
	# conversational retrieval agent component construction - memory, prompt template, agent, agent executor
	# specifying LLM to use
	if (model_type==1):
		llm =  ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k", streaming=True, callbacks=[QueueCallback(q)])
	else:
		llm =  ChatOpenAI(temperature = 0.1, model_name="gpt-4-turbo-preview", streaming=True, callbacks=[QueueCallback(q)])

	# This is needed for both the memory and the prompt
	memory_key = "history"
	memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)

	prompt = OpenAIFunctionsAgent.create_prompt(
        	system_message=system_message,
        	extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
	)

	agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
	agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False, return_intermediate_steps=True)	

	# Create a funciton to call - this will run in a thread
	def task():
		resp = agent_executor({"input":message})
		q.put(job_done)

	# Create a thread and start the function
	t = Thread(target=task)
	t.start()

	content = ""

	# Get each new token from the queue and yield for our generator
	while True:
		try:
			next_token = q.get(True, timeout=1)
			if next_token is job_done:
				break
			content += next_token
			yield next_token, content
		except:
			pass

def ask_llm(message, history, model_type):
    for next_token, content in predict(message, model_type):
        yield(content)

# set up and run chat interface
kellogg_agent = gr.ChatInterface(
	fn=ask_llm,
	chatbot=gr.Chatbot(height=500),
	textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
	title="Kellogg Course AI Assistant",
	description="Please provide your questions about courses offered by Kellogg.",
	additional_inputs=[model_type],
	additional_inputs_accordion_name="AI Assistant Options:",
	examples=[["Can you tell me about a marketing major? What would I want from my career if I went that way instead of say strategy?"],
		["I'm interested in strategy. Can you give me a recommendation of courses I should consider over the next year?"],
		["I'm wanting to know more about advertising. Can you recommend some courses on that subject?"],
		["How many credits do I need to graduate?"],
		["I loved the Competitive Strategy and industrial structure class. Can you tell me others like that one?"]],
#    cache_examples=True,
#    retry_btn=None,
#	undo_btn="Delete Previous",
	clear_btn="Clear",
)

def main():
	kellogg_agent.queue().launch()

# start UI
if __name__ == "__main__":
	main()