Spaces:
Build error
Build error
| # Import the necessary libraries | |
| import streamlit as st | |
| from openai import OpenAI | |
| from pinecone import Pinecone | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| #pinecone_key_file = "pinecone_api.txt" | |
| #with open(pinecone_key_file, "r") as f: | |
| # for line in f: | |
| # PINECONE_KEY = line | |
| # break | |
| pc = Pinecone(api_key=os.environ.get("PINECONE_KEY")) | |
| #with open('open_ai_key.txt', "r") as f: | |
| # for line in f: | |
| # OPENAI_KEY = line | |
| # break | |
| client = OpenAI(api_key=os.environ.get("OPENAI_KEY")) | |
| st.title("Seattle Pandas Super Duper ML Chatbot") | |
| class Obnoxious_Agent: | |
| def __init__(self, client) -> None: | |
| self.client=client | |
| def set_prompt(self, query): | |
| prompt=f'''Is this query obnoxious, related to machine learning, or general greetings? | |
| Answer "obnoxious" if it is an obnoxious query, answer "machine learning" if it is related to machine learning, | |
| "general greetings" if it is a general greeting, and "others" for all other queries. When considering whether | |
| a query is related to machine learning, be sure to pay attention to common machine learning acronyms (RNN, CNN, CV, GAN) | |
| and also consider topics from emerging fields like computer vision, deep learning, AI content generation, and others. | |
| Examples are included. | |
| "Query: You are stupid ; Answer: obnoxious" | |
| "Query: poop; Answer: obnoxious" | |
| "Query: kdkdkspapemrmn ; Answer: obnoxious" | |
| "Query: What is a random forest? ; Answer: machine learning" | |
| "Query: How to train a model using a GPU? ; Answer: machine learning" | |
| "Query: What is a CNN? ; Answer: machine learning" | |
| "Query: RNN? ; Answer: machine learning" | |
| "Query: Causal inference? ; Answer: machine learning" | |
| "Query: What is computer vision or CV? ; Answer: machine learning" | |
| "Query: What is computer vision or CV? ; Answer: machine learning" | |
| "Query: How are you? ; Answer: general greetings" | |
| "Query: I like shoes; Answer: other" | |
| Query: {query}''' | |
| return prompt | |
| def extract_action(self, response) -> bool: | |
| if 'obnoxious' in response.lower(): | |
| return 'obnoxious' | |
| elif 'general greetings' in response.lower(): | |
| return 'gt' | |
| elif 'machine learning' in response.lower(): | |
| return 'ml' | |
| else: | |
| return 'other' | |
| def check_query(self, query): | |
| prompt=self.set_prompt(query) | |
| # print(prompt) | |
| message = {"role": "user", "content": prompt} | |
| response = self.client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[message] | |
| ) | |
| # print(response) | |
| return self.extract_action(response.choices[0].message.content) | |
| class Query_Agent: | |
| def __init__(self, client, index='llm-chatbot-index') -> None: | |
| self.pc = Pinecone(api_key=os.environ.get("PINECONE_KEY")) | |
| self.index = self.pc.Index(index) | |
| self.client = client | |
| self.df = pd.read_csv("text_embedding.csv") | |
| self.texts_size_250 = np.array(self.df['Text']) | |
| # this assumes that an index is already there and has been onboarded, etc. | |
| def get_embedding(self, text, model="text-embedding-ada-002"): | |
| text = text.replace("\n", " ") | |
| return self.client.embeddings.create(input = [text], model=model).data[0].embedding | |
| ############################# | |
| ## TODO: Function to query the Pinecone vector store and return the top-k results | |
| def send_pinecone_query(self, query, top_k=5, namespace="250_chunk"): | |
| return self.index.query( | |
| vector=query, | |
| top_k=top_k, | |
| namespace = namespace) | |
| ############################# | |
| def query_vector_store(self, query, top_k=5): | |
| e = self.get_embedding(query) | |
| relevant = self.send_pinecone_query(e, top_k) | |
| scores = 0 | |
| context = "" | |
| for result in relevant["matches"]: | |
| scores+=float(result['score']) | |
| context+=self.texts_size_250[int(result['id'])] | |
| context+='\n' | |
| return context, scores/top_k | |
| def set_prompt(self, query, context): | |
| prompt = "Given the following context, explain "+query+": " + context | |
| return prompt | |
| def extract_action(self, response, query = None): | |
| context, avg_score=self.query_vector_store(query) | |
| if avg_score<0.3: | |
| return 'non-relevant' | |
| else: | |
| return self.set_prompt(query, context) | |
| class Answering_Agent: | |
| def __init__(self, openai_client, mode) -> None: | |
| # TODO: Initialize the Answering_Agent | |
| self.client = openai_client | |
| self.mode = mode | |
| def generate_response(self, query, docs, conv_history, k=5): | |
| prompt = f'''You are a {self.mode} chatbot. Answer all queries in a {self.mode} style. | |
| I will provide a user query you must answer, relevant documents which you | |
| should reference in your answer, and conversation history which you should | |
| refer to for context. | |
| Query: {query} | |
| Conversation History: {conv_history} | |
| Relevant Documents: {docs} | |
| ''' | |
| message = {"role": "user", "content": prompt} | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[message] | |
| ) | |
| return response.choices[0].message.content | |
| class Relevant_Documents_Agent: | |
| def __init__(self, client) -> None: | |
| self.client = client | |
| # TODO: Initialize the Relevant_Documents_Agent | |
| def get_relevance(self, query, documents, cosine_similarity) -> str: | |
| # TODO: Get if the returned documents are relevant | |
| prompt = f'''Based on the following query, please decide if the following | |
| documents are relevant to this query. For context, the average cosine similarity of these documents to this | |
| query is {cosine_similarity}. Your response must be one of the two following [relevant, non-relevant]. | |
| Query: {query} | |
| Documents: {documents}''' | |
| print("USER PROMPT:", prompt) | |
| message = {"role": "user", "content": prompt} | |
| response = self.client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[message] | |
| ) | |
| rel_response = response.choices[0].message.content | |
| print("Relevance: ", rel_response) | |
| return rel_response | |
| class Head_Agent: | |
| def __init__(self, mode) -> None: | |
| self.client = OpenAI(api_key=os.environ.get("OPENAI_KEY")) | |
| self.possible_modes = ['verbose', 'concise', 'shakespearean'] | |
| self.mode = mode | |
| self.setup_sub_agents() | |
| with st.chat_message("assistant"): | |
| st.write(f"Welcome to your {self.mode} chatbot!") | |
| def setup_sub_agents(self): | |
| self.obnoxious_agent=Obnoxious_Agent(self.client) | |
| self.query_agent=Query_Agent(self.client) | |
| self.answering_agent = Answering_Agent(self.client, self.mode) | |
| self.relevance_agent = Relevant_Documents_Agent(self.client) | |
| def evaluate_mode(self, query): | |
| prompt=f'''Classify the following query to see if it most closely matches | |
| an item in this list {self.possible_modes}. | |
| Your response MUST be a single word from that list only. Query: {query}''' | |
| # print(prompt) | |
| message = {"role": "user", "content": prompt} | |
| response = self.client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[message] | |
| ) | |
| # print(response) | |
| return response.choices[0].message.content | |
| def main_loop(self): | |
| # TODO: Run the main loop for the chatbot | |
| if "openai_model" not in st.session_state: | |
| st.session_state["openai_model"] = "gpt-3.5-turbo" | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("Hi, how can I help you?"): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| # first check if prompt is obnoxious | |
| obnoxious=self.obnoxious_agent.check_query(prompt) | |
| print(obnoxious) | |
| if obnoxious=='obnoxious': | |
| with st.chat_message("assistant"): | |
| response="Please refrain from obnoxious questions." | |
| st.write(response) | |
| elif obnoxious=='gt': | |
| with st.chat_message("assistant"): | |
| response="How can I assist you today?" | |
| st.write(response) | |
| # elif obnoxious == 'other': | |
| #with st.chat_message("assistant"): | |
| # response="I can only answer questions about machine learning!" | |
| # st.write(response) | |
| else: | |
| #next check if prompt if relevant | |
| docs, cosine_similarity = self.query_agent.query_vector_store(prompt) | |
| response=self.relevance_agent.get_relevance(prompt, docs, cosine_similarity) | |
| # make this be relevance agent | |
| if 'non-relevant' in response.lower(): | |
| if obnoxious == 'ml': | |
| with st.chat_message("assistant"): | |
| response = self.answering_agent.generate_response(prompt, '', st.session_state['messages']) | |
| st.write(response) | |
| else: | |
| with st.chat_message("assistant"): | |
| response="Please ask questions only related to Machine Learning!" | |
| st.write(response) | |
| else: | |
| with st.chat_message("assistant"): | |
| response = self.answering_agent.generate_response(prompt, docs, st.session_state['messages']) | |
| st.write(response) | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| if "mode" not in st.session_state: | |
| st.session_state.mode = "Concise" | |
| head_agent=Head_Agent(st.session_state.mode) | |
| def set_mode(): | |
| head_agent.answering_agent = Answering_Agent(head_agent.client, st.session_state.mode) | |
| st.session_state.mode = st.selectbox( | |
| 'What kind of chatbot would you like today?', | |
| ('Concise', 'Chatty', 'Shakespearean'), on_change=set_mode) | |
| st.write('You selected:', st.session_state.mode) | |
| head_agent.main_loop() | |