Spaces:
Sleeping
Sleeping
Clean and try different embedding
Browse files
README.md
CHANGED
|
@@ -9,7 +9,6 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
short_description: Chatbot assistant for the CAMELS simulations documentation
|
| 12 |
-
python_version: 3.8
|
| 13 |
---
|
| 14 |
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
short_description: Chatbot assistant for the CAMELS simulations documentation
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -14,7 +14,7 @@ document_path.write_bytes(r.content)
|
|
| 14 |
worker.process_document(document_path)
|
| 15 |
|
| 16 |
def handle_prompt(message, history):
|
| 17 |
-
bot_response = worker.process_prompt(message)
|
| 18 |
return bot_response
|
| 19 |
|
| 20 |
greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
|
|
|
|
| 14 |
worker.process_document(document_path)
|
| 15 |
|
| 16 |
def handle_prompt(message, history):
|
| 17 |
+
bot_response = worker.process_prompt(message, history)
|
| 18 |
return bot_response
|
| 19 |
|
| 20 |
greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
|
worker.py
CHANGED
|
@@ -15,7 +15,7 @@ def install(package):
|
|
| 15 |
pip._internal.main(['install', package])
|
| 16 |
|
| 17 |
# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
|
| 18 |
-
install("sentence-transformers==2.2.2")
|
| 19 |
|
| 20 |
# Check for GPU availability and set the appropriate device for computation.
|
| 21 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
@@ -37,9 +37,9 @@ def init_llm():
|
|
| 37 |
|
| 38 |
# repo name for the model
|
| 39 |
# model_id = "tiiuae/falcon-7b-instruct"
|
| 40 |
-
|
| 41 |
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
| 42 |
-
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 43 |
|
| 44 |
# load the model into the HuggingFaceHub
|
| 45 |
llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
|
|
@@ -47,8 +47,8 @@ def init_llm():
|
|
| 47 |
# llm_hub.invoke('foo bar')
|
| 48 |
|
| 49 |
#Initialize embeddings using a pre-trained model to represent the text data.
|
| 50 |
-
embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
|
| 51 |
-
|
| 52 |
embeddings = HuggingFaceInstructEmbeddings(
|
| 53 |
model_name=embedddings_model,
|
| 54 |
model_kwargs={"device": DEVICE}
|
|
@@ -75,10 +75,11 @@ def process_document(document_path):
|
|
| 75 |
# By default, the vectorstore retriever uses similarity search.
|
| 76 |
# If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
|
| 77 |
# You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
|
|
|
|
| 78 |
conversation_retrieval_chain = RetrievalQA.from_chain_type(
|
| 79 |
llm=llm_hub,
|
| 80 |
chain_type="stuff",
|
| 81 |
-
retriever=
|
| 82 |
return_source_documents=False,
|
| 83 |
input_key = "question"
|
| 84 |
# chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
|
|
@@ -86,9 +87,9 @@ def process_document(document_path):
|
|
| 86 |
|
| 87 |
|
| 88 |
# Function to process a user prompt
|
| 89 |
-
def process_prompt(prompt):
|
| 90 |
global conversation_retrieval_chain
|
| 91 |
-
global chat_history
|
| 92 |
|
| 93 |
# Query the model
|
| 94 |
output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
|
|
|
|
| 15 |
pip._internal.main(['install', package])
|
| 16 |
|
| 17 |
# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
|
| 18 |
+
# install("sentence-transformers==2.2.2")
|
| 19 |
|
| 20 |
# Check for GPU availability and set the appropriate device for computation.
|
| 21 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 37 |
|
| 38 |
# repo name for the model
|
| 39 |
# model_id = "tiiuae/falcon-7b-instruct"
|
| 40 |
+
model_id = "microsoft/Phi-3.5-mini-instruct"
|
| 41 |
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
| 42 |
+
# model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 43 |
|
| 44 |
# load the model into the HuggingFaceHub
|
| 45 |
llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
|
|
|
|
| 47 |
# llm_hub.invoke('foo bar')
|
| 48 |
|
| 49 |
#Initialize embeddings using a pre-trained model to represent the text data.
|
| 50 |
+
# embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
|
| 51 |
+
embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
|
| 52 |
embeddings = HuggingFaceInstructEmbeddings(
|
| 53 |
model_name=embedddings_model,
|
| 54 |
model_kwargs={"device": DEVICE}
|
|
|
|
| 75 |
# By default, the vectorstore retriever uses similarity search.
|
| 76 |
# If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
|
| 77 |
# You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
|
| 78 |
+
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
|
| 79 |
conversation_retrieval_chain = RetrievalQA.from_chain_type(
|
| 80 |
llm=llm_hub,
|
| 81 |
chain_type="stuff",
|
| 82 |
+
retriever=retriever,
|
| 83 |
return_source_documents=False,
|
| 84 |
input_key = "question"
|
| 85 |
# chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
# Function to process a user prompt
|
| 90 |
+
def process_prompt(prompt, chat_history):
|
| 91 |
global conversation_retrieval_chain
|
| 92 |
+
# global chat_history
|
| 93 |
|
| 94 |
# Query the model
|
| 95 |
output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
|