xangma commited on
Commit
e6ae35c
·
1 Parent(s): ef6c37d
Files changed (2) hide show
  1. app.py +12 -4
  2. chain.py +34 -31
app.py CHANGED
@@ -100,13 +100,16 @@ def get_docs():
100
  documents.extend(text_splitter.split_documents(load))
101
  return documents
102
 
103
- def set_openai_api_key(api_key, model_selector, agent):
104
  if api_key:
105
  os.environ["OPENAI_API_KEY"] = api_key
106
  documents = get_docs()
107
  embeddings = OpenAIEmbeddings()
108
  vectorstore = CachedChroma.from_documents_with_cache(".persisted_data", documents, embedding=embeddings, collection_name="pycbc")
109
- qa_chain = get_new_chain1(vectorstore, model_selector)
 
 
 
110
  os.environ["OPENAI_API_KEY"] = ""
111
  return qa_chain
112
 
@@ -137,7 +140,7 @@ with block:
137
  lines=1,
138
  type="password",
139
  )
140
- model_selector = gr.Dropdown(["gpt-3.5-turbo", "gpt-4"], label="Model")
141
 
142
  chatbot = gr.Chatbot()
143
 
@@ -172,7 +175,12 @@ with block:
172
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
173
 
174
  openai_api_key_textbox.change(
175
- set_openai_api_key,
 
 
 
 
 
176
  inputs=[openai_api_key_textbox, model_selector, agent_state],
177
  outputs=[agent_state],
178
  )
 
100
  documents.extend(text_splitter.split_documents(load))
101
  return documents
102
 
103
+ def set_chain_up(api_key, model_selector, agent):
104
  if api_key:
105
  os.environ["OPENAI_API_KEY"] = api_key
106
  documents = get_docs()
107
  embeddings = OpenAIEmbeddings()
108
  vectorstore = CachedChroma.from_documents_with_cache(".persisted_data", documents, embedding=embeddings, collection_name="pycbc")
109
+ if model_selector:
110
+ qa_chain = get_new_chain1(vectorstore, model_selector)
111
+ else:
112
+ qa_chain = get_new_chain1(vectorstore, "gpt-3.5-turbo")
113
  os.environ["OPENAI_API_KEY"] = ""
114
  return qa_chain
115
 
 
140
  lines=1,
141
  type="password",
142
  )
143
+ model_selector = gr.Dropdown(["gpt-3.5-turbo", "gpt-4"], label="Model", show_label=True)
144
 
145
  chatbot = gr.Chatbot()
146
 
 
175
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
176
 
177
  openai_api_key_textbox.change(
178
+ set_chain_up,
179
+ inputs=[openai_api_key_textbox, model_selector, agent_state],
180
+ outputs=[agent_state],
181
+ )
182
+ model_selector.change(
183
+ set_chain_up,
184
  inputs=[openai_api_key_textbox, model_selector, agent_state],
185
  outputs=[agent_state],
186
  )
chain.py CHANGED
@@ -4,7 +4,6 @@ import pathlib
4
  from typing import Dict, List, Tuple
5
 
6
  from langchain import OpenAI, PromptTemplate
7
- from langchain.chains import LLMChain
8
  from langchain.chains.base import Chain
9
  from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
10
  from langchain.chains.question_answering import load_qa_chain
@@ -21,10 +20,23 @@ import langchain
21
  from langchain.vectorstores import Chroma
22
  from langchain.llms import OpenAI
23
  from langchain.chat_models import ChatOpenAI
24
- from langchain.chains.llm import LLMChain
25
  from langchain.chains.question_answering import load_qa_chain
26
  from langchain.prompts import PromptTemplate
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  from abc import ABC
29
  from typing import List, Optional, Any
30
 
@@ -55,7 +67,7 @@ class CustomChain(Chain, BaseModel):
55
  else:
56
  new_question = question
57
  print(new_question)
58
- docs = self.vstore.similarity_search(new_question, k=25)
59
  new_inputs = inputs.copy()
60
  new_inputs["question"] = new_question
61
  new_inputs["chat_history"] = chat_history_str
@@ -64,6 +76,7 @@ class CustomChain(Chain, BaseModel):
64
 
65
 
66
  def get_new_chain1(vectorstore, model_selector) -> Chain:
 
67
 
68
  _eg_template = """## Example:
69
 
@@ -71,11 +84,6 @@ def get_new_chain1(vectorstore, model_selector) -> Chain:
71
  {chat_history}
72
  Follow Up Input: {question}
73
  Standalone question: {answer}"""
74
- _eg_prompt = PromptTemplate(
75
- template=_eg_template,
76
- input_variables=["chat_history", "question", "answer"],
77
- )
78
-
79
  _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
80
  _suffix = """## Example:
81
 
@@ -84,21 +92,6 @@ def get_new_chain1(vectorstore, model_selector) -> Chain:
84
  Follow Up Input: {question}
85
  Standalone question:"""
86
 
87
- example_selector = SemanticSimilarityExampleSelector(vectorstore=vectorstore, k=25)
88
- prompt = FewShotPromptTemplate(
89
- prefix=_prefix,
90
- suffix=_suffix,
91
- example_selector=example_selector,
92
- example_prompt=_eg_prompt,
93
- input_variables=["question", "chat_history"],
94
- )
95
- llm = ChatOpenAI(temperature=0, model_name=model_selector)
96
- key_word_extractor = LLMChain(llm=llm, prompt=prompt)
97
-
98
- EXAMPLE_PROMPT = PromptTemplate(
99
- template=">Example:\nContent:\n---------\n{page_content}\n----------\nSource: {source}",
100
- input_variables=["page_content", "source"],
101
- )
102
  template = """You are an AI assistant for the open source library PyCBC. The documentation is located at https://pycbc.readthedocs.io.
103
  You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
104
  You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
@@ -110,14 +103,24 @@ Question: {question}
110
  {context}
111
  =========
112
  Answer in Markdown:"""
113
- PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
114
- doc_chain = load_qa_chain(
115
- ChatOpenAI(temperature=0, model_name=model_selector, max_tokens=-1),
116
- chain_type="stuff",
117
- prompt=PROMPT,
118
- document_prompt=EXAMPLE_PROMPT,
119
- )
120
- return CustomChain(chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor)
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
  def _get_chat_history(chat_history: List[Tuple[str, str]]):
 
4
  from typing import Dict, List, Tuple
5
 
6
  from langchain import OpenAI, PromptTemplate
 
7
  from langchain.chains.base import Chain
8
  from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
9
  from langchain.chains.question_answering import load_qa_chain
 
20
  from langchain.vectorstores import Chroma
21
  from langchain.llms import OpenAI
22
  from langchain.chat_models import ChatOpenAI
 
23
  from langchain.chains.question_answering import load_qa_chain
24
  from langchain.prompts import PromptTemplate
25
 
26
+ import langchain
27
+ from langchain.callbacks.base import CallbackManager
28
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
29
+ # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
30
+ # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
31
+ from langchain.vectorstores import Chroma
32
+ from langchain.chat_models import ChatOpenAI
33
+ from langchain.chains import ConversationalRetrievalChain
34
+ from langchain.memory import ConversationBufferWindowMemory
35
+ from langchain.chains.llm import LLMChain
36
+ from langchain.callbacks.base import CallbackManager
37
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
38
+ from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
39
+
40
  from abc import ABC
41
  from typing import List, Optional, Any
42
 
 
67
  else:
68
  new_question = question
69
  print(new_question)
70
+ docs = self.vstore.similarity_search(new_question, k=12)
71
  new_inputs = inputs.copy()
72
  new_inputs["question"] = new_question
73
  new_inputs["chat_history"] = chat_history_str
 
76
 
77
 
78
  def get_new_chain1(vectorstore, model_selector) -> Chain:
79
+ max_tokens_dict = {'gpt-4': 2000, 'gpt-3.5-turbo': 1000}
80
 
81
  _eg_template = """## Example:
82
 
 
84
  {chat_history}
85
  Follow Up Input: {question}
86
  Standalone question: {answer}"""
 
 
 
 
 
87
  _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to PyCBC."""
88
  _suffix = """## Example:
89
 
 
92
  Follow Up Input: {question}
93
  Standalone question:"""
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  template = """You are an AI assistant for the open source library PyCBC. The documentation is located at https://pycbc.readthedocs.io.
96
  You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
97
  You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
 
103
  {context}
104
  =========
105
  Answer in Markdown:"""
106
+
107
+ # Construct a ChatVectorDBChain with a streaming llm for combine docs
108
+ # and a separate, non-streaming llm for question generation
109
+ llm = ChatOpenAI(client = None, temperature=0.7, model_name="gpt-4")
110
+ streaming_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name="gpt-4", max_tokens=1000)
111
+
112
+ question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
113
+ doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)
114
+
115
+ # memory = ConversationKGMemory(llm=llm, input_key="question", output_key="answer")
116
+ memory = ConversationBufferWindowMemory(input_key="question", output_key="answer", k=5)
117
+ retriever = vectorstore.as_retriever()
118
+ retriever.search_kwargs = {"k": 25}
119
+ qa = ConversationalRetrievalChain(
120
+ retriever=retriever, memory=memory, combine_docs_chain=doc_chain, question_generator=question_generator)
121
+
122
+
123
+ return qa
124
 
125
 
126
  def _get_chat_history(chat_history: List[Tuple[str, str]]):