Spaces:

xangma
/

chat-pycbc

Runtime error

App Files Files Community

xangma commited on Apr 7, 2023

Commit

e6ae35c

1 Parent(s): ef6c37d

I guess?

Browse files

Files changed (2) hide show

app.py +12 -4
chain.py +34 -31

app.py CHANGED Viewed

@@ -100,13 +100,16 @@ def get_docs():
             documents.extend(text_splitter.split_documents(load))
     return documents
-def set_openai_api_key(api_key, model_selector, agent):
     if api_key:
         os.environ["OPENAI_API_KEY"] = api_key
         documents = get_docs()
         embeddings = OpenAIEmbeddings()
         vectorstore = CachedChroma.from_documents_with_cache(".persisted_data", documents, embedding=embeddings, collection_name="pycbc")
-        qa_chain = get_new_chain1(vectorstore, model_selector)
         os.environ["OPENAI_API_KEY"] = ""
         return qa_chain
@@ -137,7 +140,7 @@ with block:
             lines=1,
             type="password",
         )
-        model_selector = gr.Dropdown(["gpt-3.5-turbo", "gpt-4"], label="Model")
     chatbot = gr.Chatbot()
@@ -172,7 +175,12 @@ with block:
     message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
     openai_api_key_textbox.change(
-        set_openai_api_key,
         inputs=[openai_api_key_textbox, model_selector, agent_state],
         outputs=[agent_state],
     )

             documents.extend(text_splitter.split_documents(load))
     return documents
+def set_chain_up(api_key, model_selector, agent):
     if api_key:
         os.environ["OPENAI_API_KEY"] = api_key
         documents = get_docs()
         embeddings = OpenAIEmbeddings()
         vectorstore = CachedChroma.from_documents_with_cache(".persisted_data", documents, embedding=embeddings, collection_name="pycbc")
+        if model_selector:
+            qa_chain = get_new_chain1(vectorstore, model_selector)
+        else:
+            qa_chain = get_new_chain1(vectorstore, "gpt-3.5-turbo")
         os.environ["OPENAI_API_KEY"] = ""
         return qa_chain
             lines=1,
             type="password",
         )
+        model_selector = gr.Dropdown(["gpt-3.5-turbo", "gpt-4"], label="Model", show_label=True)
     chatbot = gr.Chatbot()
     message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
     openai_api_key_textbox.change(
+        set_chain_up,
+        inputs=[openai_api_key_textbox, model_selector, agent_state],
+        outputs=[agent_state],
+    )
+    model_selector.change(
+        set_chain_up,
         inputs=[openai_api_key_textbox, model_selector, agent_state],
         outputs=[agent_state],
     )

chain.py CHANGED Viewed

@@ -4,7 +4,6 @@ import pathlib
 from typing import Dict, List, Tuple
 from langchain import OpenAI, PromptTemplate
-from langchain.chains import LLMChain
 from langchain.chains.base import Chain
 from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
 from langchain.chains.question_answering import load_qa_chain
@@ -21,10 +20,23 @@ import langchain
 from langchain.vectorstores import Chroma
 from langchain.llms import OpenAI
 from langchain.chat_models import ChatOpenAI
-from langchain.chains.llm import LLMChain
 from langchain.chains.question_answering import load_qa_chain
 from langchain.prompts import PromptTemplate
 from abc import ABC
 from typing import List, Optional, Any
@@ -55,7 +67,7 @@ class CustomChain(Chain, BaseModel):
         else:
             new_question = question
         print(new_question)
-        docs = self.vstore.similarity_search(new_question, k=25)
         new_inputs = inputs.copy()
         new_inputs["question"] = new_question
         new_inputs["chat_history"] = chat_history_str
@@ -64,6 +76,7 @@ class CustomChain(Chain, BaseModel):
 def get_new_chain1(vectorstore, model_selector) -> Chain:
     _eg_template = """## Example:
@@ -71,11 +84,6 @@ def get_new_chain1(vectorstore, model_selector) -> Chain:
     {chat_history}
     Follow Up Input: {question}
     Standalone question: {answer}"""
-    _eg_prompt = PromptTemplate(
-        template=_eg_template,
-        input_variables=["chat_history", "question", "answer"],
-    )
     _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
     _suffix = """## Example:
@@ -84,21 +92,6 @@ def get_new_chain1(vectorstore, model_selector) -> Chain:
     Follow Up Input: {question}
     Standalone question:"""
-    example_selector = SemanticSimilarityExampleSelector(vectorstore=vectorstore, k=25)
-    prompt = FewShotPromptTemplate(
-        prefix=_prefix,
-        suffix=_suffix,
-        example_selector=example_selector,
-        example_prompt=_eg_prompt,
-        input_variables=["question", "chat_history"],
-    )
-    llm = ChatOpenAI(temperature=0, model_name=model_selector)
-    key_word_extractor = LLMChain(llm=llm, prompt=prompt)
-    EXAMPLE_PROMPT = PromptTemplate(
-        template=">Example:\nContent:\n---------\n{page_content}\n----------\nSource: {source}",
-        input_variables=["page_content", "source"],
-    )
     template = """You are an AI assistant for the open source library PyCBC. The documentation is located at https://pycbc.readthedocs.io.
 You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
 You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
@@ -110,14 +103,24 @@ Question: {question}
 {context}
 =========
 Answer in Markdown:"""
-    PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
-    doc_chain = load_qa_chain(
-        ChatOpenAI(temperature=0, model_name=model_selector, max_tokens=-1),
-        chain_type="stuff",
-        prompt=PROMPT,
-        document_prompt=EXAMPLE_PROMPT,
-    )
-    return CustomChain(chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor)
 def _get_chat_history(chat_history: List[Tuple[str, str]]):

 from typing import Dict, List, Tuple
 from langchain import OpenAI, PromptTemplate
 from langchain.chains.base import Chain
 from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
 from langchain.chains.question_answering import load_qa_chain
 from langchain.vectorstores import Chroma
 from langchain.llms import OpenAI
 from langchain.chat_models import ChatOpenAI
 from langchain.chains.question_answering import load_qa_chain
 from langchain.prompts import PromptTemplate
+import langchain
+from langchain.callbacks.base import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+from langchain.vectorstores import Chroma
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.chains.llm import LLMChain
+from langchain.callbacks.base import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
 from abc import ABC
 from typing import List, Optional, Any
         else:
             new_question = question
         print(new_question)
+        docs = self.vstore.similarity_search(new_question, k=12)
         new_inputs = inputs.copy()
         new_inputs["question"] = new_question
         new_inputs["chat_history"] = chat_history_str
 def get_new_chain1(vectorstore, model_selector) -> Chain:
+    max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
     _eg_template = """## Example:
     {chat_history}
     Follow Up Input: {question}
     Standalone question: {answer}"""
     _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
     _suffix = """## Example:
     Follow Up Input: {question}
     Standalone question:"""
     template = """You are an AI assistant for the open source library PyCBC. The documentation is located at https://pycbc.readthedocs.io.
 You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
 You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
 {context}
 =========
 Answer in Markdown:"""
+    # Construct a ChatVectorDBChain with a streaming llm for combine docs
+    # and a separate, non-streaming llm for question generation
+    llm = ChatOpenAI(client = None, temperature=0.7, model_name="gpt-4")
+    streaming_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name="gpt-4", max_tokens=1000)
+    question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
+    doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)
+    # memory = ConversationKGMemory(llm=llm, input_key="question", output_key="answer")
+    memory = ConversationBufferWindowMemory(input_key="question", output_key="answer", k=5)
+    retriever = vectorstore.as_retriever()
+    retriever.search_kwargs = {"k": 25}
+    qa = ConversationalRetrievalChain(
+        retriever=retriever, memory=memory, combine_docs_chain=doc_chain, question_generator=question_generator)
+    return qa
 def _get_chat_history(chat_history: List[Tuple[str, str]]):