Spaces:
Sleeping
Sleeping
File size: 2,250 Bytes
1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a db81bb8 1df3d2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import os
import faiss
from typing import List, Optional
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.schema import Document
from app.models.model import Embedding_model_en, Embedding_model_de
from app import config
BASE_PATH = "vectorstore"
VECTORSTORE_TYPES = {
"English": "English_index",
"Deutsch": "Deutsch_index"
}
def get_embedding_model(language):
return Embedding_model_en if language == "English" else Embedding_model_de
def create_new_vectorstore(embedding_model):
dim = len(embedding_model.embed_query("hello world"))
index = faiss.IndexFlatL2(dim)
return FAISS(
embedding_function=embedding_model,
index=index,
docstore=InMemoryDocstore(),
index_to_docstore_id={}
)
def load_vectorstore(store_type: str) -> FAISS:
assert store_type in VECTORSTORE_TYPES, "Invalid vectorstore type."
path = os.path.join(BASE_PATH, VECTORSTORE_TYPES[store_type])
print(f"Load vectorstore from language {store_type}")
if os.path.exists(os.path.join(path, 'index.faiss')):
print("Reload existing faiss")
return FAISS.load_local(path, get_embedding_model(store_type), allow_dangerous_deserialization=True)
else:
print("Create new faiss")
vs = create_new_vectorstore(get_embedding_model(store_type))
save_vectorstore(vs, store_type)
return vs
def save_vectorstore(vectorstore: FAISS, store_type: str):
path = os.path.join(BASE_PATH, VECTORSTORE_TYPES[store_type])
vectorstore.save_local(path)
def add_document(content: str, metadata: dict, store_type: str):
assert store_type == metadata.get("type")
vs = load_vectorstore(store_type)
doc = Document(page_content=content, metadata=metadata)
vs.add_documents([doc])
save_vectorstore(vs, store_type)
def add_multi_documents(processed_docs: list, store_type: str):
vs = load_vectorstore(store_type)
vs.add_documents(processed_docs)
save_vectorstore(vs, store_type)
def get_relevant_documents(store_type, query: str, top_k: int = 10) -> List[Document]:
vs = load_vectorstore(store_type)
return vs.similarity_search(query, k=top_k)
|