Spaces:

HarBat
/

ChatBOT

Runtime error

App Files Files Community

HarshaBattula commited on Jun 5, 2023

Commit

a9e9e50

1 Parent(s): b91232a

adding gpt-3.5 based retrieval augmented system

Browse files

Files changed (10) hide show

app.py +52 -0
chain.py +70 -0
db/chroma-collections.parquet +3 -0
db/chroma-embeddings.parquet +3 -0
db/index/id_to_uuid_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl +3 -0
db/index/index_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.bin +3 -0
db/index/index_metadata_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl +3 -0
db/index/uuid_to_id_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl +3 -0
requirements.txt +7 -0
retriever.py +41 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import openai
+from langchain.vectorstores import Chroma
+from langchain.embeddings import OpenAIEmbeddings
+from retriever import *
+from chain import *
+import gradio as gr
+def chatbot(query):
+    llm_response = qa_chain.run({"query": query})
+    return llm_response
+def load_embeddings_database_from_disk(persistence_directory, embeddings_generator):
+    """
+    Load a Chroma vector database from disk.
+    This function loads a Chroma vector database from the specified directory on disk.
+    It expects the same persistence_directory and embedding function as used when creating the database.
+    Args:
+        persistence_directory (str): The directory where the database is stored on disk.
+        embeddings_generator (obj): The embeddings generator function that was used when creating the database.
+    Returns:
+        vector_database (obj): The loaded Chroma vector database.
+    """
+    # Load the Chroma vector database from the persistence directory.
+    # The embedding_function parameter should be the same as the one used when the database was created.
+    vector_database = Chroma(persist_directory=persistence_directory, embedding_function=embeddings_generator)
+    return vector_database
+# Specify the directory where the database will be stored when it's persisted.
+persistence_directory = 'db'
+# Create and persist the embeddings for the documents.
+embeddings_generator = OpenAIEmbeddings(openai_api_key = openai.api_key)
+# Load the Chroma vector database from disk.
+vector_database = load_embeddings_database_from_disk(persistence_directory, embeddings_generator)
+topk_documents = 2
+# Creating the retriever on top documents.
+retriever = initialize_document_retriever(topk_documents, vector_database)
+qa_chain = create_question_answering_chain(retriever)
+inputs = gr.inputs.Textbox(lines=7, label="Coversational Interface with Chat history")
+outputs = gr.outputs.Textbox(label="Reply")
+gr.Interface(fn=chatbot, inputs=inputs, outputs=outputs, title="Retrieval Augmented Question Answering",
+             show_progress = True, theme="compact").launch(share = True, debug=True)

chain.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from langchain.memory import ConversationBufferMemory
+from langchain import PromptTemplate
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import RetrievalQA
+import openai
+openai.api_key = "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"
+def create_juniper_prompt_template():
+    template =  """You are a network engineer from Juniper Networks not a Language Model, use your knowledge, and the some pieces of context (delimited by <ctx></ctx>) to answer the user's question. \n Try to pretend as if you are a member of Juniper Networks.  \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.
+                  Do not indicate that you have access to any context.
+                  Use the chat history (delimited by <hs></hs>) to keep track of the conversation.
+                  \n----------------\n
+                  <ctx>
+                  {context}
+                  </ctx>
+                  \n----------------\n
+                  ------
+                  <hs>
+                  {history}
+                  </hs>
+                  ------
+                  {question}
+                  Answer:
+                """
+    juniper_prompt_template = PromptTemplate(input_variables=["history", "context", "question"], template=template)
+    return juniper_prompt_template
+def create_question_answering_chain(retriever):
+    """
+    Create a retrieval question answering (QA) chain.
+    This function initializes a QA chain that can be used to answer questions based on retrieved documents.
+    It uses the OpenAI 'gpt-3.5-turbo' model for the language model (LLM), and a document retriever for finding
+    relevant documents.
+    Args:
+        retriever (obj): The document retriever to use for finding relevant documents.
+    Returns:
+        qa_chain (obj): The initialized retrieval QA chain.
+    """
+    # Initialize the OpenAI language model with specified temperature, model name, and API key.
+    turbo_llm = ChatOpenAI(
+        temperature=0,
+        model_name='gpt-3.5-turbo',
+        openai_api_key = openai.api_key
+    )
+    # Initialize the retrieval QA chain with the language model, chain type, document retriever,
+    # and a flag indicating whether to return source documents.
+    qa_chain = RetrievalQA.from_chain_type(
+          llm=turbo_llm,
+          chain_type='stuff',
+          retriever=retriever,
+          verbose=False,
+          chain_type_kwargs={
+              "verbose": False,
+              "prompt": create_juniper_prompt_template(),
+              "memory": ConversationBufferMemory(
+                  memory_key="history",
+                  input_key="question")
+          }
+      )
+    return qa_chain

db/chroma-collections.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d11f275e47d9d5a2bb0acb41c5746868e7288b0436871abe793fbd1679064d5e
+size 557

db/chroma-embeddings.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eac62ca72d3b72a738519d1fb159052c35a8b43284974729b737525c88d920c
+size 244539180

db/index/id_to_uuid_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:874a7a333254e6fc9fc426e74068ae585acad068eb18f40ad8781b206f30a778
+size 641398

db/index/index_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eff35d3c91148d593e15d8d18684b1841f5008db2bd664418736fec5b93f2531
+size 124197200

db/index/index_metadata_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1da962bf05d46a551c527af9a099dbbcddd739579e064b36fed531e14faeb5dc
+size 105

db/index/uuid_to_id_d25f8acb-f4d6-4b67-b80a-9b85ac72b87c.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:878e7943e4bbdd6b6b5963aa63441f165dce733a0d6a3304e839ce2231f63246
+size 749904

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+langchain
+openai
+tiktoken
+chromadb
+langchain
+pypdf
+gradio

retriever.py ADDED Viewed

	@@ -0,0 +1,41 @@

+def initialize_document_retriever(top_k_documents, vector_database):
+    """
+    Initialize a document retriever using a Chroma vector database.
+    This function initializes a document retriever that can be used to find and retrieve the most relevant documents
+    for a specified search query. The number of documents to retrieve is determined by the top_k_documents parameter.
+    Args:
+        top_k_documents (int): The number of top relevant documents to retrieve.
+        vector_database (obj): The Chroma vector database to use for retrieving documents.
+    Returns:
+        document_retriever (obj): The initialized document retriever.
+    """
+    # Initialize the document retriever with the Chroma vector database and the number of documents to retrieve.
+    document_retriever = vector_database.as_retriever(
+        search_kwargs = {"k": top_k_documents}
+    )
+    return document_retriever
+def retrieve_relevant_documents(search_query, document_retriever):
+    """
+    Retrieve the most relevant documents for a given query.
+    This function uses an initialized document retriever to find and retrieve the most relevant documents
+    for a specified search query.
+    Args:
+        search_query (str): The search query for which to find and retrieve relevant documents.
+        document_retriever (obj): The initialized document retriever.
+    Returns:
+        relevant_documents (list): The list of most relevant documents for the search query.
+    """
+    # Retrieve the most relevant documents for the search query using the document retriever.
+    relevant_documents = document_retriever.get_relevant_documents(search_query)
+    return relevant_documents