Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,18 +14,13 @@ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLo
|
|
| 14 |
from langchain.document_loaders.generic import GenericLoader
|
| 15 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
| 16 |
from langchain.schema import AIMessage, HumanMessage
|
| 17 |
-
from langchain.llms import HuggingFaceHub
|
| 18 |
-
from langchain.llms import HuggingFaceTextGenInference
|
| 19 |
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
| 20 |
|
| 21 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 22 |
-
from langchain.prompts import PromptTemplate
|
| 23 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 24 |
from langchain.vectorstores import Chroma
|
| 25 |
from chromadb.errors import InvalidDimensionException
|
| 26 |
|
| 27 |
-
#from langchain.vectorstores import MongoDBAtlasVectorSearch
|
| 28 |
-
#from pymongo import MongoClient
|
| 29 |
|
| 30 |
from dotenv import load_dotenv, find_dotenv
|
| 31 |
_ = load_dotenv(find_dotenv())
|
|
@@ -44,14 +39,6 @@ template = """Antworte in deutsch, wenn es nicht explizit anders gefordert wird.
|
|
| 44 |
llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
|
| 45 |
rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
|
| 46 |
|
| 47 |
-
#################################################
|
| 48 |
-
#Prompts - Zusammensetzung
|
| 49 |
-
#################################################
|
| 50 |
-
LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"],
|
| 51 |
-
template = llm_template)
|
| 52 |
-
#mit RAG
|
| 53 |
-
RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
|
| 54 |
-
template = rag_template)
|
| 55 |
|
| 56 |
#################################################
|
| 57 |
# Konstanten
|
|
@@ -170,12 +157,6 @@ def document_storage_chroma(splits):
|
|
| 170 |
#HF embeddings--------------------------------------
|
| 171 |
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
| 172 |
|
| 173 |
-
#Mongo DB die splits ablegen - vektorisiert...
|
| 174 |
-
def document_storage_mongodb(splits):
|
| 175 |
-
MongoDBAtlasVectorSearch.from_documents(documents = splits,
|
| 176 |
-
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
| 177 |
-
collection = MONGODB_COLLECTION,
|
| 178 |
-
index_name = MONGODB_INDEX_NAME)
|
| 179 |
|
| 180 |
#Vektorstore vorbereiten...
|
| 181 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
|
@@ -193,14 +174,6 @@ def document_retrieval_chroma(llm, prompt):
|
|
| 193 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 194 |
return db
|
| 195 |
|
| 196 |
-
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
| 197 |
-
def document_retrieval_mongodb(llm, prompt):
|
| 198 |
-
db = MongoDBAtlasVectorSearch.from_connection_string(MONGODB_URI,
|
| 199 |
-
MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
|
| 200 |
-
OpenAIEmbeddings(disallowed_special = ()),
|
| 201 |
-
index_name = MONGODB_INDEX_NAME)
|
| 202 |
-
return db
|
| 203 |
-
|
| 204 |
|
| 205 |
###############################################
|
| 206 |
#Langchain anlegen
|
|
|
|
| 14 |
from langchain.document_loaders.generic import GenericLoader
|
| 15 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
| 16 |
from langchain.schema import AIMessage, HumanMessage
|
|
|
|
|
|
|
| 17 |
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
| 18 |
|
| 19 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
|
| 20 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 21 |
from langchain.vectorstores import Chroma
|
| 22 |
from chromadb.errors import InvalidDimensionException
|
| 23 |
|
|
|
|
|
|
|
| 24 |
|
| 25 |
from dotenv import load_dotenv, find_dotenv
|
| 26 |
_ = load_dotenv(find_dotenv())
|
|
|
|
| 39 |
llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
|
| 40 |
rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
#################################################
|
| 44 |
# Konstanten
|
|
|
|
| 157 |
#HF embeddings--------------------------------------
|
| 158 |
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
#Vektorstore vorbereiten...
|
| 162 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
|
|
|
| 174 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
| 175 |
return db
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
###############################################
|
| 179 |
#Langchain anlegen
|