Update create_retriever.py
Browse files- create_retriever.py +6 -8
create_retriever.py
CHANGED
|
@@ -75,7 +75,7 @@ def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=3
|
|
| 75 |
raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
|
| 76 |
|
| 77 |
# Retriever using Chroma and HuggingFace embeddings
|
| 78 |
-
def retriever_chroma(flag, model_name="
|
| 79 |
# Load or download the embedding model
|
| 80 |
embeddings = get_embedding_model(model_name)
|
| 81 |
|
|
@@ -84,18 +84,16 @@ def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='c
|
|
| 84 |
all_doc = docs_return(0)
|
| 85 |
|
| 86 |
# Use the splitter parameters
|
| 87 |
-
text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
|
| 88 |
|
| 89 |
# Split the documents using the text splitter
|
| 90 |
-
docs = text_splitter.split_documents(documents=all_doc)
|
| 91 |
|
| 92 |
# Create a Chroma vector database
|
| 93 |
-
vectordb = Chroma.from_documents(
|
| 94 |
-
|
| 95 |
# Create the retriever
|
| 96 |
-
chroma_retriever = vectordb.as_retriever(
|
| 97 |
-
search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
|
| 98 |
-
)
|
| 99 |
return chroma_retriever
|
| 100 |
else:
|
| 101 |
# Load a local Chroma vectorstore
|
|
|
|
| 75 |
raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
|
| 76 |
|
| 77 |
# Retriever using Chroma and HuggingFace embeddings
|
| 78 |
+
def retriever_chroma(flag, model_name="sentence-transformers/all-mpnet-base-v2", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
|
| 79 |
# Load or download the embedding model
|
| 80 |
embeddings = get_embedding_model(model_name)
|
| 81 |
|
|
|
|
| 84 |
all_doc = docs_return(0)
|
| 85 |
|
| 86 |
# Use the splitter parameters
|
| 87 |
+
# text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
|
| 88 |
|
| 89 |
# Split the documents using the text splitter
|
| 90 |
+
# docs = text_splitter.split_documents(documents=all_doc)
|
| 91 |
|
| 92 |
# Create a Chroma vector database
|
| 93 |
+
vectordb = Chroma.from_documents(all_doc, embeddings, persist_directory="./chroma_db")
|
| 94 |
+
# print(all_doc)
|
| 95 |
# Create the retriever
|
| 96 |
+
chroma_retriever = vectordb.as_retriever()
|
|
|
|
|
|
|
| 97 |
return chroma_retriever
|
| 98 |
else:
|
| 99 |
# Load a local Chroma vectorstore
|