Update utils.py
Browse files
utils.py
CHANGED
|
@@ -386,26 +386,8 @@ def document_storage_chroma(splits):
|
|
| 386 |
|
| 387 |
# Vectorstore initialisieren und Dokumente hinzufügen
|
| 388 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn) #, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 389 |
-
retriever = vectorstore.as_retriever(search_kwargs = {"k": ANZAHL_DOCS})
|
| 390 |
-
# Persist the vectorstore to disk
|
| 391 |
-
#vectorstore.persist()
|
| 392 |
|
| 393 |
-
return vectorstore
|
| 394 |
-
|
| 395 |
-
############################################
|
| 396 |
-
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
| 397 |
-
"""
|
| 398 |
-
def document_retrieval_chroma(llm, prompt):
|
| 399 |
-
#HF embeddings -----------------------------------
|
| 400 |
-
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 401 |
-
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 402 |
-
#etwas weniger rechenaufwendig:
|
| 403 |
-
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELL, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 404 |
-
|
| 405 |
-
#ChromaDb um die embedings zu speichern
|
| 406 |
-
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 407 |
-
return db
|
| 408 |
-
"""
|
| 409 |
|
| 410 |
|
| 411 |
|
|
|
|
| 386 |
|
| 387 |
# Vectorstore initialisieren und Dokumente hinzufügen
|
| 388 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn) #, persist_directory = PATH_WORK + CHROMA_DIR)
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
+
return vectorstore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
|
| 393 |
|