Spaces:

vishwask
/

rag

Build error

App Files Files Community

vishwask commited on Feb 27, 2024

Commit

2ed5c8a

verified ·

1 Parent(s): 036be0e

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -102

app.py CHANGED Viewed

@@ -21,12 +21,16 @@ import tqdm
 import accelerate
-# default_persist_directory = './chroma_HF/'
-list_llm = ["mistralai/Mistral-7B-Instruct-v0.2"]
-list_llm_simple = [os.path.basename(llm) for llm in list_llm]
-# Load PDF document and create doc splits
 def load_doc(list_file_path, chunk_size, chunk_overlap):
     # Processing for one document only
     # loader = PyPDFLoader(file_path)
@@ -55,7 +59,6 @@ def create_db(splits, collection_name):
     )
     return vectordb
 # Load vector database
 def load_db():
     embedding = HuggingFaceEmbeddings()
@@ -64,99 +67,38 @@ def load_db():
         embedding_function=embedding)
     return vectordb
 # Initialize langchain LLM chain
-def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
-    progress(0.1, desc="Initializing HF tokenizer...")
-    # HuggingFaceHub uses HF inference endpoints
-    progress(0.5, desc="Initializing HF Hub...")
-    # Use of trust_remote_code as model_kwargs
-    # Warning: langchain issue
-    # URL: https://github.com/langchain-ai/langchain/issues/6080
-    if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
-        llm = HuggingFaceHub(
-            repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
-        )
-    elif llm_model == "microsoft/phi-2":
-        raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
-        llm = HuggingFaceHub(
-            repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
-        )
-    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
-        llm = HuggingFaceHub(
-            repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
-        )
-    elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
-        raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
-        llm = HuggingFaceHub(
-            repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
-        )
-    else:
-        llm = HuggingFaceHub(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
-        )
-    progress(0.75, desc="Defining buffer memory...")
-    memory = ConversationBufferMemory(
-        memory_key="chat_history",
-        output_key='answer',
-        return_messages=True
-    )
-    # retriever=vector_db.as_retriever(search_type="similarity", search_kwargs={'k': 3})
     retriever=vector_db.as_retriever()
-    progress(0.8, desc="Defining retrieval chain...")
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
         retriever=retriever,
         chain_type="stuff",
         memory=memory,
-        # combine_docs_chain_kwargs={"prompt": your_prompt})
         return_source_documents=True,
-        #return_generated_question=False,
         verbose=False,
     )
-    progress(0.9, desc="Done!")
     return qa_chain
-def start(llm_model, temperature, max_tokens, top_k,
-          vector_db, list_file_obj, chunk_size, chunk_overlap,
-         qa_chain, message, history):
-    # HuggingFaceHub uses HF inference endpoints
-    # Use of trust_remote_code as model_kwargs
-    # Warning: langchain issue
-    # URL: https://github.com/langchain-ai/langchain/issues/6080
-    llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature": temperature,
-                                                          "max_new_tokens": max_tokens,
-                                                          "top_k": top_k,
-                                                          "load_in_8bit": True})
-    memory = ConversationBufferMemory(memory_key="chat_history",output_key='answer',return_messages=True)
-    retriever=vector_db.as_retriever()
-    qa_chain = ConversationalRetrievalChain.from_llm(
-        llm,
-        retriever=retriever,
-        chain_type="stuff",
-        memory=memory,
-        # combine_docs_chain_kwargs={"prompt": your_prompt})
-        return_source_documents=True,
-        #return_generated_question=False,
-        verbose=False,
-    )
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
     collection_name = Path(list_file_path[0]).stem
     # Fix potential issues from naming convention
     ## Remove space
     collection_name = collection_name.replace(" ","-")
@@ -169,13 +111,33 @@ def start(llm_model, temperature, max_tokens, top_k,
         collection_name[-1] = 'Z'
     # print('list_file_path: ', list_file_path)
     print('Collection name: ', collection_name)
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
     vector_db = create_db(doc_splits, collection_name)
     formatted_chat_history = format_chat_history(message, history)
     #print("formatted_chat_history",formatted_chat_history)
@@ -197,17 +159,22 @@ def start(llm_model, temperature, max_tokens, top_k,
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
-    return qa_chain, vector_db, collection_name, new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
 def demo():
-    with gr.Blocks(theme="base") as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
         collection_name = gr.State()
         chatbot = gr.Chatbot(height=300)
-        with gr.Accordion("Advanced - Document references", open=False):
             with gr.Row():
                 doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
                 source1_page = gr.Number(label="Page", scale=1)
@@ -218,19 +185,18 @@ def demo():
                 doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
                 source3_page = gr.Number(label="Page", scale=1)
         with gr.Row():
-            msg = gr.Textbox(placeholder="Type message", container=True)
         with gr.Row():
-            submit_btn = gr.Button("Submit")
-            clear_btn = gr.ClearButton([msg, chatbot])
-        msg.submit(start,
-                   inputs=[llm_model, temperature, max_tokens, top_k,
-                           vector_db, list_file_obj, chunk_size, chunk_overlap,
-                           qa_chain, message, history],
-                   outputs=[qa_chain, msg, chatbot, doc_source1, source1_page,
-                            doc_source2, source2_page,
-                            doc_source3, source3_page],
-                   queue=False)
         submit_btn.click(conversation, \
             inputs=[qa_chain, msg, chatbot], \
             outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
@@ -239,8 +205,5 @@ def demo():
             inputs=None, \
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
     demo.queue().launch(debug=True)
-if __name__ == "__main__":
-    demo()

 import accelerate
+#Set parameters
+llm_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
+list_file_path = '/home/niti/something'
+chunk_size = 1024
+chunk_overlap = 128
+temperature = 0.1
+max_tokens = 6000
+top_k = 3
 def load_doc(list_file_path, chunk_size, chunk_overlap):
     # Processing for one document only
     # loader = PyPDFLoader(file_path)
     )
     return vectordb
 # Load vector database
 def load_db():
     embedding = HuggingFaceEmbeddings()
         embedding_function=embedding)
     return vectordb
 # Initialize langchain LLM chain
+def initialize_llmchain(vector_db):
+    llm = HuggingFaceHub(repo_id = llm_model,
+                        model_kwargs={"temperature": temperature,
+                                      "max_new_tokens": max_tokens,
+                                      "top_k": top_k,
+                                      "load_in_8bit": True})
+    memory = ConversationBufferMemory(memory_key="chat_history", output_key='answer', return_messages=True)
     retriever=vector_db.as_retriever()
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
         retriever=retriever,
         chain_type="stuff",
         memory=memory,
         return_source_documents=True,
         verbose=False,
     )
     return qa_chain
+vector_db, collection_name = initialize_database()
+#list_file_obj = document
+# Initialize database
+def initialize_database(list_file_obj):
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
+    progress(0.1, desc="Creating collection name...")
     collection_name = Path(list_file_path[0]).stem
     # Fix potential issues from naming convention
     ## Remove space
     collection_name = collection_name.replace(" ","-")
         collection_name[-1] = 'Z'
     # print('list_file_path: ', list_file_path)
     print('Collection name: ', collection_name)
+    progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
+    progress(0.5, desc="Generating vector database...")
+    # global vector_db
     vector_db = create_db(doc_splits, collection_name)
+    progress(0.9, desc="Done!")
+    return vector_db, collection_name
+def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
+    # print("llm_option",llm_option)
+    llm_name = llm_model
+    qa_chain = initialize_llmchain(llm_name, temperature, max_tokens, top_k, vector_db)
+    return qa_chain
+def format_chat_history(message, chat_history):
+    formatted_chat_history = []
+    for user_message, bot_message in chat_history:
+        formatted_chat_history.append(f"User: {user_message}")
+        formatted_chat_history.append(f"Assistant: {bot_message}")
+    return formatted_chat_history
+def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
     #print("formatted_chat_history",formatted_chat_history)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
+    # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
+    return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
+document = os.listdir(list_file_path)
+vector_db, collection_name = initialize_database(document)
+qa_chain = initialize_LLM(vector_db)
 def demo():
+    with gr.Blocks(theme='base') as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
         collection_name = gr.State()
         chatbot = gr.Chatbot(height=300)
+        with gr.Accordion('References', open=True):
             with gr.Row():
                 doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
                 source1_page = gr.Number(label="Page", scale=1)
                 doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
                 source3_page = gr.Number(label="Page", scale=1)
         with gr.Row():
+            msg = gr.Textbox(placeholder = 'Ask your question', container = True)
         with gr.Row():
+            submit_btn = gr.Button('Submit')
+            clear_button = gr.ClearButton([msg, chatbot])
+        msg.submit(conversation, \
+            inputs=[qa_chain, msg, chatbot], \
+            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
+            queue=False)
         submit_btn.click(conversation, \
             inputs=[qa_chain, msg, chatbot], \
             outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             inputs=None, \
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
     demo.queue().launch(debug=True)