peichao.dong commited on
Commit ·
2a0c033
1
Parent(s): 066c6cf
update embedding
Browse files- app.py +4 -3
- documents/abstract.faiss/index.faiss +0 -0
- documents/abstract.faiss/index.pkl +3 -0
- embedding.py +41 -10
app.py
CHANGED
|
@@ -50,12 +50,13 @@ def feedBack(context, story, chatbot=[], input=""):
|
|
| 50 |
|
| 51 |
customerEmbedding = CustomEmbedding()
|
| 52 |
|
| 53 |
-
faqChain = customerEmbedding.
|
| 54 |
|
| 55 |
code_agent_executor = code_agent_executor()
|
| 56 |
def faqFromLocal(input, chatbot=[]):
|
| 57 |
-
response = faqChain({"question": f"{input}"})
|
| 58 |
-
|
|
|
|
| 59 |
return chatbot, ""
|
| 60 |
|
| 61 |
|
|
|
|
| 50 |
|
| 51 |
customerEmbedding = CustomEmbedding()
|
| 52 |
|
| 53 |
+
faqChain = customerEmbedding.getFAQAgent()
|
| 54 |
|
| 55 |
code_agent_executor = code_agent_executor()
|
| 56 |
def faqFromLocal(input, chatbot=[]):
|
| 57 |
+
# response = faqChain({"question": f"{input}"})
|
| 58 |
+
response = faqChain.run(input)
|
| 59 |
+
chatbot.append((input, response))
|
| 60 |
return chatbot, ""
|
| 61 |
|
| 62 |
|
documents/abstract.faiss/index.faiss
ADDED
|
Binary file (58.4 kB). View file
|
|
|
documents/abstract.faiss/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65b241ca9d637fc607f43c0190c682677b635dbd36cddb0b754c0f74ea6988da
|
| 3 |
+
size 26724
|
embedding.py
CHANGED
|
@@ -9,34 +9,35 @@ from langchain.chains.question_answering import load_qa_chain
|
|
| 9 |
from langchain.document_loaders import NotionDirectoryLoader
|
| 10 |
from langchain.memory import ConversationBufferMemory
|
| 11 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
| 12 |
|
| 13 |
from models import llm
|
| 14 |
|
| 15 |
|
| 16 |
class CustomEmbedding:
|
| 17 |
notionDirectoryLoader = NotionDirectoryLoader(
|
| 18 |
-
"
|
| 19 |
embeddings = HuggingFaceEmbeddings()
|
| 20 |
|
| 21 |
def calculateEmbedding(self):
|
| 22 |
documents = self.notionDirectoryLoader.load()
|
| 23 |
-
text_splitter = SpacyTextSplitter(
|
| 24 |
-
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
texts = text_splitter.split_documents(documents)
|
| 29 |
|
| 30 |
docsearch = FAISS.from_documents(texts, self.embeddings)
|
| 31 |
docsearch.save_local(
|
| 32 |
-
folder_path="./documents/
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
def getFAQChain(self, llm=llm(temperature=0.7)):
|
| 37 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 38 |
docsearch = FAISS.load_local(
|
| 39 |
-
"./documents/
|
| 40 |
# retriever = VectorStoreRetriever(vectorstore=docsearch)
|
| 41 |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
|
| 42 |
|
|
@@ -48,15 +49,45 @@ class CustomEmbedding:
|
|
| 48 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
| 49 |
|
| 50 |
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
|
| 51 |
-
qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(),
|
| 52 |
question_generator=question_generator,
|
| 53 |
combine_docs_chain=doc_chain,
|
| 54 |
memory=memory)
|
| 55 |
return qa
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# # customerEmbedding.calculateNotionEmbedding()
|
| 61 |
|
| 62 |
# faq_chain = customerEmbedding.getFAQChain()
|
|
|
|
| 9 |
from langchain.document_loaders import NotionDirectoryLoader
|
| 10 |
from langchain.memory import ConversationBufferMemory
|
| 11 |
from langchain.chains import ConversationalRetrievalChain
|
| 12 |
+
from langchain.agents import initialize_agent, AgentType, Tool, ZeroShotAgent, AgentExecutor
|
| 13 |
|
| 14 |
from models import llm
|
| 15 |
|
| 16 |
|
| 17 |
class CustomEmbedding:
|
| 18 |
notionDirectoryLoader = NotionDirectoryLoader(
|
| 19 |
+
"/Users/peichao.dong/Documents/projects/dpc/ABstract/docs/pages")
|
| 20 |
embeddings = HuggingFaceEmbeddings()
|
| 21 |
|
| 22 |
def calculateEmbedding(self):
|
| 23 |
documents = self.notionDirectoryLoader.load()
|
| 24 |
+
# text_splitter = SpacyTextSplitter(
|
| 25 |
+
# chunk_size=2048, pipeline="zh_core_web_sm", chunk_overlap=0)
|
| 26 |
|
| 27 |
+
text_splitter = MarkdownTextSplitter(
|
| 28 |
+
chunk_size=2048, chunk_overlap=0)
|
| 29 |
texts = text_splitter.split_documents(documents)
|
| 30 |
|
| 31 |
docsearch = FAISS.from_documents(texts, self.embeddings)
|
| 32 |
docsearch.save_local(
|
| 33 |
+
folder_path="./documents/abstract.faiss")
|
| 34 |
|
| 35 |
|
| 36 |
|
| 37 |
def getFAQChain(self, llm=llm(temperature=0.7)):
|
| 38 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 39 |
docsearch = FAISS.load_local(
|
| 40 |
+
"./documents/abstract.faiss", self.embeddings)
|
| 41 |
# retriever = VectorStoreRetriever(vectorstore=docsearch)
|
| 42 |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
|
| 43 |
|
|
|
|
| 49 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
| 50 |
|
| 51 |
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
|
| 52 |
+
qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(search_kwargs={"k": 1}),
|
| 53 |
question_generator=question_generator,
|
| 54 |
combine_docs_chain=doc_chain,
|
| 55 |
memory=memory)
|
| 56 |
return qa
|
| 57 |
|
| 58 |
+
def faq(self, input):
|
| 59 |
+
qa = self.getFAQChain()
|
| 60 |
+
response = qa({"question": f"{input}"})
|
| 61 |
+
return response["answer"]
|
| 62 |
|
| 63 |
+
def getFAQAgent(self):
|
| 64 |
+
tools = [Tool(name="ABstract system FAQ", func= self.faq, description="Useful for anwer questions about ABstract system")]
|
| 65 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
| 66 |
+
|
| 67 |
+
prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
|
| 68 |
+
suffix = """Begin!"
|
| 69 |
+
|
| 70 |
+
{chat_history}
|
| 71 |
+
Question: {input}
|
| 72 |
+
{agent_scratchpad}"""
|
| 73 |
+
|
| 74 |
+
prompt = ZeroShotAgent.create_prompt(
|
| 75 |
+
tools,
|
| 76 |
+
prefix=prefix,
|
| 77 |
+
suffix=suffix,
|
| 78 |
+
input_variables=["input", "chat_history", "agent_scratchpad"]
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
llm_chain = LLMChain(llm=llm(), prompt=prompt)
|
| 82 |
+
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
|
| 83 |
+
faq_agent = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
|
| 84 |
+
return faq_agent
|
| 85 |
+
# faq_agent = initialize_agent(tools= tools, llm=llm(), agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
customerEmbedding = CustomEmbedding()
|
| 90 |
+
customerEmbedding.calculateEmbedding()
|
| 91 |
# # customerEmbedding.calculateNotionEmbedding()
|
| 92 |
|
| 93 |
# faq_chain = customerEmbedding.getFAQChain()
|