Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,6 +22,8 @@ from langchain_community.chat_message_histories import ChatMessageHistory
|
|
| 22 |
if not os.path.isdir('database'):
|
| 23 |
os.system("unzip database.zip")
|
| 24 |
|
|
|
|
|
|
|
| 25 |
loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
|
| 26 |
|
| 27 |
documents = loader.load()
|
|
@@ -29,10 +31,22 @@ documents = loader.load()
|
|
| 29 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 30 |
texts = text_splitter.split_documents(documents)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
persist_directory = 'db'
|
| 33 |
|
| 34 |
embedding = HuggingFaceEmbeddings()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
vectordb = Chroma.from_documents(documents=texts,
|
| 37 |
embedding=embedding,
|
| 38 |
persist_directory=persist_directory)
|
|
@@ -40,9 +54,21 @@ vectordb = Chroma.from_documents(documents=texts,
|
|
| 40 |
vectordb.persist()
|
| 41 |
vectordb = None
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
vectordb = Chroma(persist_directory=persist_directory,
|
| 44 |
embedding_function=embedding)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def format_docs(docs):
|
| 47 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 48 |
|
|
@@ -56,6 +82,12 @@ rag_chain = (
|
|
| 56 |
| StrOutputParser()
|
| 57 |
)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
| 60 |
which might reference context in the chat history, formulate a standalone question \
|
| 61 |
which can be understood without the chat history. Do NOT answer the question, \
|
|
|
|
| 22 |
if not os.path.isdir('database'):
|
| 23 |
os.system("unzip database.zip")
|
| 24 |
|
| 25 |
+
clean_up_tokenization_spaces = True
|
| 26 |
+
|
| 27 |
loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
|
| 28 |
|
| 29 |
documents = loader.load()
|
|
|
|
| 31 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 32 |
texts = text_splitter.split_documents(documents)
|
| 33 |
|
| 34 |
+
print()
|
| 35 |
+
print("-------")
|
| 36 |
+
print("TextSplitter, DirectoryLoader")
|
| 37 |
+
print("-------")
|
| 38 |
+
print("--")
|
| 39 |
+
|
| 40 |
persist_directory = 'db'
|
| 41 |
|
| 42 |
embedding = HuggingFaceEmbeddings()
|
| 43 |
|
| 44 |
+
print()
|
| 45 |
+
print("-------")
|
| 46 |
+
print("Embeddings")
|
| 47 |
+
print("-------")
|
| 48 |
+
print("--")
|
| 49 |
+
|
| 50 |
vectordb = Chroma.from_documents(documents=texts,
|
| 51 |
embedding=embedding,
|
| 52 |
persist_directory=persist_directory)
|
|
|
|
| 54 |
vectordb.persist()
|
| 55 |
vectordb = None
|
| 56 |
|
| 57 |
+
print()
|
| 58 |
+
print("-------")
|
| 59 |
+
print("Chroma1")
|
| 60 |
+
print("-------")
|
| 61 |
+
print("--")
|
| 62 |
+
|
| 63 |
vectordb = Chroma(persist_directory=persist_directory,
|
| 64 |
embedding_function=embedding)
|
| 65 |
|
| 66 |
+
print()
|
| 67 |
+
print("-------")
|
| 68 |
+
print("Chroma2")
|
| 69 |
+
print("-------")
|
| 70 |
+
print("--")
|
| 71 |
+
|
| 72 |
def format_docs(docs):
|
| 73 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 74 |
|
|
|
|
| 82 |
| StrOutputParser()
|
| 83 |
)
|
| 84 |
|
| 85 |
+
print()
|
| 86 |
+
print("-------")
|
| 87 |
+
print("Retriever, Prompt, LLM, Rag_Chain")
|
| 88 |
+
print("-------")
|
| 89 |
+
print("--")
|
| 90 |
+
|
| 91 |
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
| 92 |
which might reference context in the chat history, formulate a standalone question \
|
| 93 |
which can be understood without the chat history. Do NOT answer the question, \
|