Spaces:

araeyn
/

schoolQuest

Runtime error

App Files Files Community

araeyn commited on Aug 15, 2024

Commit

a23b8d7

verified ·

1 Parent(s): 2e7f253

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -110

app.py CHANGED Viewed

@@ -47,120 +47,117 @@ async def echo(websocket):
 async def main():
     async with serve(echo, "0.0.0.0", 7860):
         await asyncio.Future()
-def g():
-    global retriever, conversational_rag_chain
-    if not os.path.isdir('database'):
-        os.system("unzip database.zip")
-    loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
-    documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    splits = text_splitter.split_documents(documents)
-    print()
-    print("-------")
-    print("TextSplitter, DirectoryLoader")
-    print("-------")
-    persist_directory = 'db'
-    # embedding = HuggingFaceInferenceAPIEmbeddings(api_key=os.environ["HUGGINGFACE_API_KEY"], model=)
-    model_name = "BAAI/bge-large-en"
-    model_kwargs = {'device': 'cpu'}
-    encode_kwargs = {'normalize_embeddings': True}
-    embedding = HuggingFaceBgeEmbeddings(
-        model_name=model_name,
-        model_kwargs=model_kwargs,
-        encode_kwargs=encode_kwargs,
-        show_progress=True,
-    )
-    print()
-    print("-------")
-    print("Embeddings")
-    print("-------")
-    vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
-    def format_docs(docs):
-        return "\n\n".join(doc.page_content for doc in docs)
-    retriever = vectorstore.as_retriever()
-    prompt = hub.pull("rlm/rag-prompt")
-    llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
-    rag_chain = (
-        {"context": retriever | format_docs, "question": RunnablePassthrough()}
-        | prompt
-        | llm
-        | StrOutputParser()
-    )
-    print()
-    print("-------")
-    print("Retriever, Prompt, LLM, Rag_Chain")
-    print("-------")
-    ### Contextualize question ###
-    contextualize_q_system_prompt = """Given a chat history and the latest user question \
-    which might reference context in the chat history, formulate a standalone question \
-    which can be understood without the chat history. Do NOT answer the question, \
-    just reformulate it if needed and otherwise return it as is."""
-    contextualize_q_prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", contextualize_q_system_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ]
-    )
-    history_aware_retriever = create_history_aware_retriever(
-        llm, retriever, contextualize_q_prompt
-    )
-    ### Answer question ###
-    qa_system_prompt = """You are an assistant for question-answering tasks. \
-    Use the following pieces of retrieved context to answer the question. \
-    If you don't know the answer, just say that you don't know. \
-    Use three sentences maximum and keep the answer concise.\
-    {context}"""
-    qa_prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", qa_system_prompt),
-            MessagesPlaceholder("chat_history"),
-            ("human", "{input}"),
-        ]
-    )
-    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
-    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
-    ### Statefully manage chat history ###
-    store = {}
-    def get_session_history(session_id: str) -> BaseChatMessageHistory:
-        if session_id not in store:
-            store[session_id] = ChatMessageHistory()
-        return store[session_id]
-    conversational_rag_chain = RunnableWithMessageHistory(
-        rag_chain,
-        get_session_history,
-        input_messages_key="input",
-        history_messages_key="chat_history",
-        output_messages_key="answer",
-    )
 def f():
     asyncio.run(main())
 Process(target=f).start()
-Process(target=g).start()
 """
 websocket
 streamlit app ~> backend

 async def main():
     async with serve(echo, "0.0.0.0", 7860):
         await asyncio.Future()
 def f():
     asyncio.run(main())
 Process(target=f).start()
+if not os.path.isdir('database'):
+    os.system("unzip database.zip")
+loader = DirectoryLoader('./database', glob="./*.txt", loader_cls=TextLoader)
+documents = loader.load()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+splits = text_splitter.split_documents(documents)
+print()
+print("-------")
+print("TextSplitter, DirectoryLoader")
+print("-------")
+persist_directory = 'db'
+# embedding = HuggingFaceInferenceAPIEmbeddings(api_key=os.environ["HUGGINGFACE_API_KEY"], model=)
+model_name = "BAAI/bge-large-en"
+model_kwargs = {'device': 'cpu'}
+encode_kwargs = {'normalize_embeddings': True}
+embedding = HuggingFaceBgeEmbeddings(
+    model_name=model_name,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs,
+    show_progress=True,
+)
+print()
+print("-------")
+print("Embeddings")
+print("-------")
+vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+retriever = vectorstore.as_retriever()
+prompt = hub.pull("rlm/rag-prompt")
+llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
+rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+    | prompt
+    | llm
+    | StrOutputParser()
+)
+print()
+print("-------")
+print("Retriever, Prompt, LLM, Rag_Chain")
+print("-------")
+### Contextualize question ###
+contextualize_q_system_prompt = """Given a chat history and the latest user question \
+which might reference context in the chat history, formulate a standalone question \
+which can be understood without the chat history. Do NOT answer the question, \
+just reformulate it if needed and otherwise return it as is."""
+contextualize_q_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", contextualize_q_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+history_aware_retriever = create_history_aware_retriever(
+    llm, retriever, contextualize_q_prompt
+)
+### Answer question ###
+qa_system_prompt = """You are an assistant for question-answering tasks. \
+Use the following pieces of retrieved context to answer the question. \
+If you don't know the answer, just say that you don't know. \
+Use three sentences maximum and keep the answer concise.\
+{context}"""
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", qa_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+### Statefully manage chat history ###
+store = {}
+def get_session_history(session_id: str) -> BaseChatMessageHistory:
+    if session_id not in store:
+        store[session_id] = ChatMessageHistory()
+    return store[session_id]
+conversational_rag_chain = RunnableWithMessageHistory(
+    rag_chain,
+    get_session_history,
+    input_messages_key="input",
+    history_messages_key="chat_history",
+    output_messages_key="answer",
+)
 """
 websocket
 streamlit app ~> backend