File size: 10,804 Bytes
e7466d2
 
3d591c2
d922281
3d591c2
d922281
 
e7466d2
 
d922281
 
 
 
 
 
 
 
 
 
 
 
 
 
3d591c2
e7466d2
 
d922281
e7466d2
 
d922281
 
 
e7466d2
d922281
 
 
c98aa49
 
 
 
d922281
 
 
 
 
 
c98aa49
 
 
 
 
 
d922281
 
e7466d2
d922281
 
b5e6b00
d922281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9af69f5
d922281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7466d2
 
d922281
 
e7466d2
d922281
 
e7466d2
d922281
 
e7466d2
d922281
 
 
 
 
 
e7466d2
d922281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394f8ff
 
f72f7a9
394f8ff
 
 
 
 
d922281
 
 
 
 
 
 
f72f7a9
 
ae7c48f
6824001
f72f7a9
9d6880d
f72f7a9
 
 
 
 
ebe21e9
f72f7a9
 
 
 
155b610
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# Import the necessary libraries
import streamlit as st
from openai import OpenAI
from pinecone import Pinecone
import os
import pandas as pd
import numpy as np



#pinecone_key_file = "pinecone_api.txt"
#with open(pinecone_key_file, "r") as f:
#  for line in f:
#    PINECONE_KEY = line
#    break
  
pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))

#with open('open_ai_key.txt', "r") as f:
#  for line in f:
#    OPENAI_KEY = line
#    break

client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))


st.title("Seattle Pandas Super Duper ML Chatbot")


class Obnoxious_Agent:
    def __init__(self, client) -> None:
        self.client=client

    def set_prompt(self, query):
        prompt=f'''Is this query obnoxious, related to machine learning, or general greetings? 
        Answer "obnoxious" if it is an obnoxious query, answer "machine learning" if it is related to machine learning,
          "general greetings" if it is a general greeting, and "others" for all other queries. When considering whether
           a query is related to machine learning, be sure to pay attention to common machine learning acronyms (RNN, CNN, CV, GAN)
            and also consider topics from emerging fields like computer vision, deep learning, AI content generation, and others.
            Examples are included.
           
          "Query: You are stupid ; Answer: obnoxious"
          "Query: poop; Answer: obnoxious"
          "Query: kdkdkspapemrmn ; Answer: obnoxious"
          "Query: What is a random forest? ; Answer: machine learning"
          "Query: How to train a model using a GPU? ; Answer: machine learning"
          "Query: What is a CNN? ; Answer: machine learning"
          "Query: RNN? ; Answer: machine learning"
          "Query: Causal inference? ; Answer: machine learning"
          "Query: What is computer vision or CV? ; Answer: machine learning"
          "Query: What is computer vision or CV? ; Answer: machine learning"                    

          "Query: How are you? ; Answer: general greetings"
          "Query: I like shoes; Answer: other"

          Query: {query}'''
        return prompt

    def extract_action(self, response) -> bool:
        if 'obnoxious' in response.lower():
            return 'obnoxious'
        elif 'general greetings' in response.lower():
            return 'gt'
        elif 'machine learning' in response.lower():
            return 'ml'
        else:
            return 'other'
        
    def check_query(self, query):
        prompt=self.set_prompt(query)
        # print(prompt)
        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        # print(response)
        return self.extract_action(response.choices[0].message.content)




class Query_Agent:
    def __init__(self, client, index='llm-chatbot-index') -> None:

        self.pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))  
        self.index = self.pc.Index(index)
        self.client = client
        self.df = pd.read_csv("text_embedding.csv")
        self.texts_size_250 = np.array(self.df['Text'])
        
        # this assumes that an index is already there and has been onboarded, etc. 
        
    def get_embedding(self, text, model="text-embedding-ada-002"):
        text = text.replace("\n", " ")
        return self.client.embeddings.create(input = [text], model=model).data[0].embedding

        #############################
    ## TODO: Function to query the Pinecone vector store and return the top-k results
    def send_pinecone_query(self, query, top_k=5, namespace="250_chunk"):
        return self.index.query(
        vector=query,
        top_k=top_k,
        namespace = namespace)
    #############################

    def query_vector_store(self, query, top_k=5):
        e = self.get_embedding(query)
        relevant = self.send_pinecone_query(e, top_k)
        scores = 0
        context = ""
        for result in relevant["matches"]:
            scores+=float(result['score'])
            context+=self.texts_size_250[int(result['id'])]
            context+='\n'
        return context, scores/top_k

    def set_prompt(self, query, context):
        prompt = "Given the following context, explain "+query+": " + context
        return prompt

    def extract_action(self, response, query = None):
        context, avg_score=self.query_vector_store(query)
        if avg_score<0.3:
            return 'non-relevant'
        else:
            return self.set_prompt(query, context)
        

class Answering_Agent:
    def __init__(self, openai_client, mode) -> None:
        # TODO: Initialize the Answering_Agent
        self.client = openai_client
        self.mode = mode

    def generate_response(self, query, docs, conv_history, k=5):
        prompt = f'''You are a {self.mode} chatbot. Answer all queries in a {self.mode} style.
          I will provide a user query you must answer, relevant documents which you
            should reference in your answer, and conversation history which you should
            refer to for context.
            Query: {query}
            Conversation History: {conv_history}
            Relevant Documents: {docs}
          '''
        message = {"role": "user", "content": prompt}
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        return response.choices[0].message.content


class Relevant_Documents_Agent:
    def __init__(self, client) -> None:

        self.client = client
        # TODO: Initialize the Relevant_Documents_Agent

    def get_relevance(self, query, documents, cosine_similarity) -> str:
        # TODO: Get if the returned documents are relevant

        prompt = f'''Based on the following query, please decide if the following
          documents are relevant to this query. For context, the average cosine similarity of these documents to this
          query is {cosine_similarity}. Your response must be one of the two following [relevant, non-relevant].
          
          Query: {query}
          Documents: {documents}'''
        
        print("USER PROMPT:", prompt)

        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )

        rel_response = response.choices[0].message.content
        print("Relevance: ", rel_response)
        return rel_response


class Head_Agent:
    def __init__(self, mode) -> None:

        self.client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))
        self.possible_modes = ['verbose', 'concise', 'shakespearean']
        self.mode = mode 
        self.setup_sub_agents()

        with st.chat_message("assistant"):
            st.write(f"Welcome to your {self.mode} chatbot!")

    def setup_sub_agents(self):
        self.obnoxious_agent=Obnoxious_Agent(self.client)
        self.query_agent=Query_Agent(self.client)
        self.answering_agent = Answering_Agent(self.client, self.mode)
        self.relevance_agent = Relevant_Documents_Agent(self.client)
    
    def evaluate_mode(self, query):
        prompt=f'''Classify the following query to see if it most closely matches
          an item in this list {self.possible_modes}. 
          Your response MUST be a single word from that list only. Query: {query}'''
        # print(prompt)
        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        # print(response)
        return response.choices[0].message.content


    def main_loop(self):
        # TODO: Run the main loop for the chatbot
        if "openai_model" not in st.session_state:
            st.session_state["openai_model"] = "gpt-3.5-turbo"

        if "messages" not in st.session_state:
            st.session_state.messages = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])


        if prompt := st.chat_input("Hi, how can I help you?"):
            st.session_state.messages.append({"role": "user", "content": prompt})

            with st.chat_message("user"):
                st.markdown(prompt)
            
            # first check if prompt is obnoxious
            obnoxious=self.obnoxious_agent.check_query(prompt)
            print(obnoxious)
            if obnoxious=='obnoxious':
                with st.chat_message("assistant"):
                    response="Please refrain from obnoxious questions."
                    st.write(response)
            elif obnoxious=='gt':
                with st.chat_message("assistant"):
                    response="How can I assist you today?"
                    st.write(response)
            # elif obnoxious == 'other':
                #with st.chat_message("assistant"):
                #    response="I can only answer questions about machine learning!"
                #    st.write(response)  
            else:
                #next check if prompt if relevant
                docs, cosine_similarity = self.query_agent.query_vector_store(prompt)
                response=self.relevance_agent.get_relevance(prompt, docs, cosine_similarity)
                # make this be relevance agent

                if 'non-relevant' in response.lower():
                    if obnoxious == 'ml':
                        with st.chat_message("assistant"):
                            response = self.answering_agent.generate_response(prompt, '', st.session_state['messages'])
                            st.write(response)
                    else:
                        with st.chat_message("assistant"):
                            response="Please ask questions only related to Machine Learning!"
                            st.write(response)
                else:
                    with st.chat_message("assistant"):
                        response = self.answering_agent.generate_response(prompt, docs, st.session_state['messages'])
                        st.write(response)
            st.session_state.messages.append({"role": "assistant", "content": response})


if "mode" not in st.session_state:
    st.session_state.mode = "Concise"

head_agent=Head_Agent(st.session_state.mode)

def set_mode():
    head_agent.answering_agent = Answering_Agent(head_agent.client, st.session_state.mode)


st.session_state.mode = st.selectbox(
    'What kind of chatbot would you like today?',
    ('Concise', 'Chatty', 'Shakespearean'), on_change=set_mode)

st.write('You selected:', st.session_state.mode)


head_agent.main_loop()