Update utils.py
Browse files
utils.py
CHANGED
|
@@ -35,7 +35,8 @@ from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordD
|
|
| 35 |
from langchain.schema import AIMessage, HumanMessage
|
| 36 |
from langchain_community.llms import HuggingFaceHub
|
| 37 |
from langchain_community.llms import HuggingFaceTextGenInference
|
| 38 |
-
from langchain_community.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
|
|
|
| 39 |
from langchain_community.tools import DuckDuckGoSearchRun
|
| 40 |
from typing import Dict, TypedDict
|
| 41 |
from langchain_core.messages import BaseMessage
|
|
@@ -224,9 +225,9 @@ def document_storage_chroma(splits):
|
|
| 224 |
def document_retrieval_chroma(llm, prompt):
|
| 225 |
#HF embeddings -----------------------------------
|
| 226 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 227 |
-
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 228 |
#etwas weniger rechenaufwendig:
|
| 229 |
-
|
| 230 |
|
| 231 |
#ChromaDb um die embedings zu speichern
|
| 232 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
|
|
|
| 35 |
from langchain.schema import AIMessage, HumanMessage
|
| 36 |
from langchain_community.llms import HuggingFaceHub
|
| 37 |
from langchain_community.llms import HuggingFaceTextGenInference
|
| 38 |
+
#from langchain_community.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
| 39 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 40 |
from langchain_community.tools import DuckDuckGoSearchRun
|
| 41 |
from typing import Dict, TypedDict
|
| 42 |
from langchain_core.messages import BaseMessage
|
|
|
|
| 225 |
def document_retrieval_chroma(llm, prompt):
|
| 226 |
#HF embeddings -----------------------------------
|
| 227 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 228 |
+
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 229 |
#etwas weniger rechenaufwendig:
|
| 230 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 231 |
|
| 232 |
#ChromaDb um die embedings zu speichern
|
| 233 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|