Spaces:

rachelsmith9240
/

chatbot

Build error

File size: 10,804 Bytes

# Import the necessary libraries
import streamlit as st
from openai import OpenAI
from pinecone import Pinecone
import os
import pandas as pd
import numpy as np



#pinecone_key_file = "pinecone_api.txt"
#with open(pinecone_key_file, "r") as f:
#  for line in f:
#    PINECONE_KEY = line
#    break
  
pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))

#with open('open_ai_key.txt', "r") as f:
#  for line in f:
#    OPENAI_KEY = line
#    break

client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))


st.title("Seattle Pandas Super Duper ML Chatbot")


class Obnoxious_Agent:
    def __init__(self, client) -> None:
        self.client=client

    def set_prompt(self, query):
        prompt=f'''Is this query obnoxious, related to machine learning, or general greetings? 
        Answer "obnoxious" if it is an obnoxious query, answer "machine learning" if it is related to machine learning,
          "general greetings" if it is a general greeting, and "others" for all other queries. When considering whether
           a query is related to machine learning, be sure to pay attention to common machine learning acronyms (RNN, CNN, CV, GAN)
            and also consider topics from emerging fields like computer vision, deep learning, AI content generation, and others.
            Examples are included.
           
          "Query: You are stupid ; Answer: obnoxious"
          "Query: poop; Answer: obnoxious"
          "Query: kdkdkspapemrmn ; Answer: obnoxious"
          "Query: What is a random forest? ; Answer: machine learning"
          "Query: How to train a model using a GPU? ; Answer: machine learning"
          "Query: What is a CNN? ; Answer: machine learning"
          "Query: RNN? ; Answer: machine learning"
          "Query: Causal inference? ; Answer: machine learning"
          "Query: What is computer vision or CV? ; Answer: machine learning"
          "Query: What is computer vision or CV? ; Answer: machine learning"                    

          "Query: How are you? ; Answer: general greetings"
          "Query: I like shoes; Answer: other"

          Query: {query}'''
        return prompt

    def extract_action(self, response) -> bool:
        if 'obnoxious' in response.lower():
            return 'obnoxious'
        elif 'general greetings' in response.lower():
            return 'gt'
        elif 'machine learning' in response.lower():
            return 'ml'
        else:
            return 'other'
        
    def check_query(self, query):
        prompt=self.set_prompt(query)
        # print(prompt)
        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        # print(response)
        return self.extract_action(response.choices[0].message.content)




class Query_Agent:
    def __init__(self, client, index='llm-chatbot-index') -> None:

        self.pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))  
        self.index = self.pc.Index(index)
        self.client = client
        self.df = pd.read_csv("text_embedding.csv")
        self.texts_size_250 = np.array(self.df['Text'])
        
        # this assumes that an index is already there and has been onboarded, etc. 
        
    def get_embedding(self, text, model="text-embedding-ada-002"):
        text = text.replace("\n", " ")
        return self.client.embeddings.create(input = [text], model=model).data[0].embedding

        #############################
    ## TODO: Function to query the Pinecone vector store and return the top-k results
    def send_pinecone_query(self, query, top_k=5, namespace="250_chunk"):
        return self.index.query(
        vector=query,
        top_k=top_k,
        namespace = namespace)
    #############################

    def query_vector_store(self, query, top_k=5):
        e = self.get_embedding(query)
        relevant = self.send_pinecone_query(e, top_k)
        scores = 0
        context = ""
        for result in relevant["matches"]:
            scores+=float(result['score'])
            context+=self.texts_size_250[int(result['id'])]
            context+='\n'
        return context, scores/top_k

    def set_prompt(self, query, context):
        prompt = "Given the following context, explain "+query+": " + context
        return prompt

    def extract_action(self, response, query = None):
        context, avg_score=self.query_vector_store(query)
        if avg_score<0.3:
            return 'non-relevant'
        else:
            return self.set_prompt(query, context)
        

class Answering_Agent:
    def __init__(self, openai_client, mode) -> None:
        # TODO: Initialize the Answering_Agent
        self.client = openai_client
        self.mode = mode

    def generate_response(self, query, docs, conv_history, k=5):
        prompt = f'''You are a {self.mode} chatbot. Answer all queries in a {self.mode} style.
          I will provide a user query you must answer, relevant documents which you
            should reference in your answer, and conversation history which you should
            refer to for context.
            Query: {query}
            Conversation History: {conv_history}
            Relevant Documents: {docs}
          '''
        message = {"role": "user", "content": prompt}
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        return response.choices[0].message.content


class Relevant_Documents_Agent:
    def __init__(self, client) -> None:

        self.client = client
        # TODO: Initialize the Relevant_Documents_Agent

    def get_relevance(self, query, documents, cosine_similarity) -> str:
        # TODO: Get if the returned documents are relevant

        prompt = f'''Based on the following query, please decide if the following
          documents are relevant to this query. For context, the average cosine similarity of these documents to this
          query is {cosine_similarity}. Your response must be one of the two following [relevant, non-relevant].
          
          Query: {query}
          Documents: {documents}'''
        
        print("USER PROMPT:", prompt)

        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )

        rel_response = response.choices[0].message.content
        print("Relevance: ", rel_response)
        return rel_response


class Head_Agent:
    def __init__(self, mode) -> None:

        self.client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))
        self.possible_modes = ['verbose', 'concise', 'shakespearean']
        self.mode = mode 
        self.setup_sub_agents()

        with st.chat_message("assistant"):
            st.write(f"Welcome to your {self.mode} chatbot!")

    def setup_sub_agents(self):
        self.obnoxious_agent=Obnoxious_Agent(self.client)
        self.query_agent=Query_Agent(self.client)
        self.answering_agent = Answering_Agent(self.client, self.mode)
        self.relevance_agent = Relevant_Documents_Agent(self.client)
    
    def evaluate_mode(self, query):
        prompt=f'''Classify the following query to see if it most closely matches
          an item in this list {self.possible_modes}. 
          Your response MUST be a single word from that list only. Query: {query}'''
        # print(prompt)
        message = {"role": "user", "content": prompt}
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[message]
        )
        # print(response)
        return response.choices[0].message.content


    def main_loop(self):
        # TODO: Run the main loop for the chatbot
        if "openai_model" not in st.session_state:
            st.session_state["openai_model"] = "gpt-3.5-turbo"

        if "messages" not in st.session_state:
            st.session_state.messages = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])


        if prompt := st.chat_input("Hi, how can I help you?"):
            st.session_state.messages.append({"role": "user", "content": prompt})

            with st.chat_message("user"):
                st.markdown(prompt)
            
            # first check if prompt is obnoxious
            obnoxious=self.obnoxious_agent.check_query(prompt)
            print(obnoxious)
            if obnoxious=='obnoxious':
                with st.chat_message("assistant"):
                    response="Please refrain from obnoxious questions."
                    st.write(response)
            elif obnoxious=='gt':
                with st.chat_message("assistant"):
                    response="How can I assist you today?"
                    st.write(response)
            # elif obnoxious == 'other':
                #with st.chat_message("assistant"):
                #    response="I can only answer questions about machine learning!"
                #    st.write(response)  
            else:
                #next check if prompt if relevant
                docs, cosine_similarity = self.query_agent.query_vector_store(prompt)
                response=self.relevance_agent.get_relevance(prompt, docs, cosine_similarity)
                # make this be relevance agent

                if 'non-relevant' in response.lower():
                    if obnoxious == 'ml':
                        with st.chat_message("assistant"):
                            response = self.answering_agent.generate_response(prompt, '', st.session_state['messages'])
                            st.write(response)
                    else:
                        with st.chat_message("assistant"):
                            response="Please ask questions only related to Machine Learning!"
                            st.write(response)
                else:
                    with st.chat_message("assistant"):
                        response = self.answering_agent.generate_response(prompt, docs, st.session_state['messages'])
                        st.write(response)
            st.session_state.messages.append({"role": "assistant", "content": response})


if "mode" not in st.session_state:
    st.session_state.mode = "Concise"

head_agent=Head_Agent(st.session_state.mode)

def set_mode():
    head_agent.answering_agent = Answering_Agent(head_agent.client, st.session_state.mode)


st.session_state.mode = st.selectbox(
    'What kind of chatbot would you like today?',
    ('Concise', 'Chatty', 'Shakespearean'), on_change=set_mode)

st.write('You selected:', st.session_state.mode)


head_agent.main_loop()