File size: 5,846 Bytes
c3d1724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998bbdc
 
 
 
 
 
 
 
 
c3d1724
 
 
 
 
 
 
 
 
 
998bbdc
 
c3d1724
 
998bbdc
 
 
 
 
 
 
c3d1724
998bbdc
 
 
c3d1724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
import os
import pinecone
import openai

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone

from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder
from langchain.agents import AgentExecutor
from langchain.agents.agent_toolkits import create_retriever_tool

# Function to read files from a folder
def read_files_from_folder(folder_path):
    file_data = {}
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):  # Assuming text files
            with open(os.path.join(folder_path, filename), 'r') as f:
                file_data[filename] = f.read()
    return file_data

print("CHECK - Pinecone vector db setup")

# set up OpenAI environment vars and embeddings
openai.api_key = os.environ.get("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings()

# initialize pinecone db
index_name = "kellogg-course-assistant"

pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),
    environment=os.getenv("PINECONE_ENV"),
)

# Read files from the "kellogg" folder into a dictionary
kellogg_data = read_files_from_folder("kellogg")

# Transform the text content to vectors
kellogg_vectors = {key: embeddings.transform(value) for key, value in kellogg_data.items()}

# Upload the vectors to Pinecone
vectorsearch = Pinecone.from_existing_index(index_name, embeddings)
vectorsearch.upsert(items=kellogg_vectors)

# load existing index
retriever = vectorsearch.as_retriever()

print("CHECK - setting up conversational retrieval agent")

# create LLM
llm4 = ChatOpenAI(temperature = 0.1, model_name="gpt-4")
llm35 = ChatOpenAI(temperature = 0.1, model_name="gpt-3.5-turbo-16k")
llm = llm4

# create retrieval tool
tool = create_retriever_tool(
    retriever, 
    "search_kellogg_site",
    "Searches and returns content from within the Kellogg website."
)
tools = [tool]

# conversational retrieval agent component construction - memory, prompt template, agent, agent executor
# This is needed for both the memory and the prompt
memory_key = "history"
memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)
# memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, max_history=0, max_token_limit= 4000)

system_message = SystemMessage(
        content=(
            "You are a helpful educational expert providing advice to students of the Northwestern business school Kellogg. "
            "Use both your knowledge and the Kellogg site search tool to generate helpful answers for questions about courses and providing a list of suggested web course articles for more information. "
            "Format your answer with distinct <h3>titles</h3> and <h3>subtitles</h3>, <b>emphasis</b>, <b>bold</b>, <i>italic<i>, <li>lists</li>, and tables *use html code*.  For lists, or bullet points, always start them by having a topic in <b>emphasis</b> before going into the description. Ensure to frequently take concepts and break them down into bullet points or lists following the emphasis directions that were just laid out."
            "Do not include details of your intermediate steps in the final response. "
            "At the end of your response, provide links to relevant web course articles returned by the retriever."
        )
)

prompt = OpenAIFunctionsAgent.create_prompt(
        system_message=system_message,
        extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
)

agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True,return_intermediate_steps=True)

# query = "I first noticed that data wasn’t flowing from splunk and received the following error message - No errors were recorded for the customer’s Splunk input"
# result = agent_executor({"input":query})

print("CHECK - setting up gradio chatbot UI")

# build Gradio selectable options in Chat UI
model_type=gr.Dropdown(choices=["gpt-4 + rag", 
								"gpt-3.5-turbo + rag"], 
								value="gpt-4 + rag",
								type="index",
								label="LLM Models"
)

# gradio chatbot UI
def predict(message, history, model_type):
	# clearing RAG memory
	memory.clear()
	
	# specifying LLM to use
	if (model_type==0):
		llm = llm4
	elif (model_type==1):
		llm = llm35
	else:
		llm = llm4
		
	llm_response = agent_executor({"input":message})
	
	return llm_response["output"]

# set up and run chat interface
kellogg_agent = gr.ChatInterface(
	fn=predict,
	chatbot=gr.Chatbot(height=500),
	textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
	title="Kellogg Course AI Assistant",
	description="Please provide your questions about courses offered by Kellogg.",
	additional_inputs=[model_type],
	additional_inputs_accordion_name="AI Assistant Options:",
	examples=[["Can you tell me about a marketing major?"],
        ["What would I want from my career if I went towards marketing instead of strategy?"],
        ["I'm interested in strategy. Can you give me a recommendation of courses I should consider over the next year?"],
		["I'm wanting to know more about advertising. Can you recommend some courses on that subject?"],
		["How many credits do I need to graduate?"],
		["I loved the Competitive Strategy and industrial structure class. Can you tell me others like that one?"]],
#    cache_examples=True,
#    retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear",
)

user_cred = os.environ.get("USER_CRED")
pass_cred = os.environ.get("PASS_CRED")

# start UI
if __name__ == "__main__":
	kellogg_agent.queue().launch(auth=(user_cred, pass_cred))