Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

App Files Files Community

PabloVD commited on Oct 29, 2024

Commit

fe4c7e0

1 Parent(s): edb1388

Clean and try different embedding

Browse files

Files changed (3) hide show

README.md +0 -1
app.py +1 -1
worker.py +9 -8

README.md CHANGED Viewed

@@ -9,7 +9,6 @@ app_file: app.py
 pinned: false
 license: mit
 short_description: Chatbot assistant for the CAMELS simulations documentation
-python_version: 3.8
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 license: mit
 short_description: Chatbot assistant for the CAMELS simulations documentation
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ document_path.write_bytes(r.content)
 worker.process_document(document_path)
 def handle_prompt(message, history):
-    bot_response = worker.process_prompt(message)
     return bot_response
 greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"

 worker.process_document(document_path)
 def handle_prompt(message, history):
+    bot_response = worker.process_prompt(message, history)
     return bot_response
 greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"

worker.py CHANGED Viewed

@@ -15,7 +15,7 @@ def install(package):
         pip._internal.main(['install', package])
 # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
-install("sentence-transformers==2.2.2")
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -37,9 +37,9 @@ def init_llm():
     # repo name for the model
     # model_id = "tiiuae/falcon-7b-instruct"
-    # model_id = "microsoft/Phi-3.5-mini-instruct"
     # model_id = "meta-llama/Llama-3.2-1B-Instruct"
-    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     # load the model into the HuggingFaceHub
     llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
@@ -47,8 +47,8 @@ def init_llm():
     # llm_hub.invoke('foo bar')
     #Initialize embeddings using a pre-trained model to represent the text data.
-    embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
-    # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
     embeddings = HuggingFaceInstructEmbeddings(
         model_name=embedddings_model,
         model_kwargs={"device": DEVICE}
@@ -75,10 +75,11 @@ def process_document(document_path):
     # By default, the vectorstore retriever uses similarity search.
     # If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
     # You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
     conversation_retrieval_chain = RetrievalQA.from_chain_type(
         llm=llm_hub,
         chain_type="stuff",
-        retriever=db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25}),
         return_source_documents=False,
         input_key = "question"
      #   chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
@@ -86,9 +87,9 @@ def process_document(document_path):
 # Function to process a user prompt
-def process_prompt(prompt):
     global conversation_retrieval_chain
-    global chat_history
     # Query the model
     output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})

         pip._internal.main(['install', package])
 # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
+# install("sentence-transformers==2.2.2")
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
     # repo name for the model
     # model_id = "tiiuae/falcon-7b-instruct"
+    model_id = "microsoft/Phi-3.5-mini-instruct"
     # model_id = "meta-llama/Llama-3.2-1B-Instruct"
+    # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     # load the model into the HuggingFaceHub
     llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
     # llm_hub.invoke('foo bar')
     #Initialize embeddings using a pre-trained model to represent the text data.
+    # embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
+    embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
     embeddings = HuggingFaceInstructEmbeddings(
         model_name=embedddings_model,
         model_kwargs={"device": DEVICE}
     # By default, the vectorstore retriever uses similarity search.
     # If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
     # You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
+    retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
     conversation_retrieval_chain = RetrievalQA.from_chain_type(
         llm=llm_hub,
         chain_type="stuff",
+        retriever=retriever,
         return_source_documents=False,
         input_key = "question"
      #   chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
 # Function to process a user prompt
+def process_prompt(prompt, chat_history):
     global conversation_retrieval_chain
+    # global chat_history
     # Query the model
     output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})