added EvalDataset Generation
Browse files
app.py
CHANGED
|
@@ -238,7 +238,7 @@ class BSIChatbot:
|
|
| 238 |
#newprint("Saving Embeddings took", end-start, "seconds!")
|
| 239 |
else:
|
| 240 |
start = time.time()
|
| 241 |
-
if vectorstore
|
| 242 |
vectorstore = FAISS.load_local(self.embedPath, self.embedding_model, allow_dangerous_deserialization=True)
|
| 243 |
#self.vectorstore.index = index_gpu
|
| 244 |
end = time.time()
|
|
@@ -280,7 +280,7 @@ class BSIChatbot:
|
|
| 280 |
#print(vectorstore.index_to_docstore_id)
|
| 281 |
#newprint(vectorstore)
|
| 282 |
# Iteriere über alle IDs im index_to_docstore_id
|
| 283 |
-
if docstore
|
| 284 |
docstore = vectorstore.docstore._dict.values()
|
| 285 |
|
| 286 |
#for doc_id in vectorstore.index_to_docstore_id.values():
|
|
@@ -336,14 +336,15 @@ class BSIChatbot:
|
|
| 336 |
global rerankingModel
|
| 337 |
if hybridSearch == True:
|
| 338 |
allDocs = self.retrieveDocFromFaiss()
|
| 339 |
-
if bm25_retriever
|
| 340 |
bm25_retriever = BM25Retriever.from_documents(allDocs)
|
| 341 |
#TODO!
|
| 342 |
retriever_k=15
|
| 343 |
bm25_retriever.k= retriever_k
|
| 344 |
vectordb = vectorstore.as_retriever(search_kwargs={"k":retriever_k})
|
| 345 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, vectordb], weights=[0.5, 0.5])
|
| 346 |
-
retrieved_chunks = ensemble_retriever.
|
|
|
|
| 347 |
#newprint("DBG: Number of Chunks retrieved")
|
| 348 |
#newprint(len(retrieved_chunks))
|
| 349 |
else:
|
|
@@ -364,7 +365,7 @@ class BSIChatbot:
|
|
| 364 |
i = i + 1
|
| 365 |
|
| 366 |
if rerankingStep == True:
|
| 367 |
-
if rerankingModel
|
| 368 |
print("initializing Reranker-Model..")
|
| 369 |
self.initializeRerankingModel()
|
| 370 |
print("Starting Reranking Chunks...")
|
|
|
|
| 238 |
#newprint("Saving Embeddings took", end-start, "seconds!")
|
| 239 |
else:
|
| 240 |
start = time.time()
|
| 241 |
+
if vectorstore is None:
|
| 242 |
vectorstore = FAISS.load_local(self.embedPath, self.embedding_model, allow_dangerous_deserialization=True)
|
| 243 |
#self.vectorstore.index = index_gpu
|
| 244 |
end = time.time()
|
|
|
|
| 280 |
#print(vectorstore.index_to_docstore_id)
|
| 281 |
#newprint(vectorstore)
|
| 282 |
# Iteriere über alle IDs im index_to_docstore_id
|
| 283 |
+
if docstore is None:
|
| 284 |
docstore = vectorstore.docstore._dict.values()
|
| 285 |
|
| 286 |
#for doc_id in vectorstore.index_to_docstore_id.values():
|
|
|
|
| 336 |
global rerankingModel
|
| 337 |
if hybridSearch == True:
|
| 338 |
allDocs = self.retrieveDocFromFaiss()
|
| 339 |
+
if bm25_retriever is None:
|
| 340 |
bm25_retriever = BM25Retriever.from_documents(allDocs)
|
| 341 |
#TODO!
|
| 342 |
retriever_k=15
|
| 343 |
bm25_retriever.k= retriever_k
|
| 344 |
vectordb = vectorstore.as_retriever(search_kwargs={"k":retriever_k})
|
| 345 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, vectordb], weights=[0.5, 0.5])
|
| 346 |
+
retrieved_chunks = ensemble_retriever.invoke(query)
|
| 347 |
+
#retrieved_chunks = ensemble_retriever.get_relevant_documents(query)
|
| 348 |
#newprint("DBG: Number of Chunks retrieved")
|
| 349 |
#newprint(len(retrieved_chunks))
|
| 350 |
else:
|
|
|
|
| 365 |
i = i + 1
|
| 366 |
|
| 367 |
if rerankingStep == True:
|
| 368 |
+
if rerankingModel is None:
|
| 369 |
print("initializing Reranker-Model..")
|
| 370 |
self.initializeRerankingModel()
|
| 371 |
print("Starting Reranking Chunks...")
|