Nelly-43 commited on
Commit
e20e7ae
·
verified ·
1 Parent(s): 0ec03fb

Update create_retriever.py

Browse files
Files changed (1) hide show
  1. create_retriever.py +6 -8
create_retriever.py CHANGED
@@ -75,7 +75,7 @@ def get_text_splitter(splitter_type='character', chunk_size=500, chunk_overlap=3
75
  raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
76
 
77
  # Retriever using Chroma and HuggingFace embeddings
78
- def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
79
  # Load or download the embedding model
80
  embeddings = get_embedding_model(model_name)
81
 
@@ -84,18 +84,16 @@ def retriever_chroma(flag, model_name="BAAI/bge-large-en-v1.5", splitter_type='c
84
  all_doc = docs_return(0)
85
 
86
  # Use the splitter parameters
87
- text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
88
 
89
  # Split the documents using the text splitter
90
- docs = text_splitter.split_documents(documents=all_doc)
91
 
92
  # Create a Chroma vector database
93
- vectordb = Chroma.from_documents(docs, embeddings, persist_directory="./chroma_db")
94
-
95
  # Create the retriever
96
- chroma_retriever = vectordb.as_retriever(
97
- search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}
98
- )
99
  return chroma_retriever
100
  else:
101
  # Load a local Chroma vectorstore
 
75
  raise ValueError("Unsupported splitter type. Choose from 'character', 'recursive', or 'token'.")
76
 
77
  # Retriever using Chroma and HuggingFace embeddings
78
+ def retriever_chroma(flag, model_name="sentence-transformers/all-mpnet-base-v2", splitter_type='character', chunk_size=500, chunk_overlap=30, separator="\n", max_tokens=1000):
79
  # Load or download the embedding model
80
  embeddings = get_embedding_model(model_name)
81
 
 
84
  all_doc = docs_return(0)
85
 
86
  # Use the splitter parameters
87
+ # text_splitter = get_text_splitter(splitter_type=splitter_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator, max_tokens=max_tokens)
88
 
89
  # Split the documents using the text splitter
90
+ # docs = text_splitter.split_documents(documents=all_doc)
91
 
92
  # Create a Chroma vector database
93
+ vectordb = Chroma.from_documents(all_doc, embeddings, persist_directory="./chroma_db")
94
+ # print(all_doc)
95
  # Create the retriever
96
+ chroma_retriever = vectordb.as_retriever()
 
 
97
  return chroma_retriever
98
  else:
99
  # Load a local Chroma vectorstore