Spaces:
Runtime error
Runtime error
| from typing import List, Type | |
| from langchain.docstore.document import Document | |
| from langchain.embeddings import HuggingFaceBgeEmbeddings | |
| from langchain.embeddings.base import Embeddings | |
| from langchain.vectorstores import VectorStore | |
| from langchain.vectorstores.faiss import FAISS | |
| from .debug import FakeEmbeddings, FakeVectorStore | |
| from .parsing import File | |
| class FolderIndex: | |
| """Index for a collection of files (a folder)""" | |
| def __init__(self, files: List[File], index: VectorStore): | |
| self.name: str = "default" | |
| self.files = files | |
| self.index: VectorStore = index | |
| def _combine_files(files: List[File]) -> List[Document]: | |
| """Combines all the documents in a list of files into a single list.""" | |
| all_texts = [] | |
| for file in files: | |
| for doc in file.docs: | |
| doc.metadata["file_name"] = file.name | |
| doc.metadata["file_id"] = file.id | |
| all_texts.append(doc) | |
| return all_texts | |
| def from_files( | |
| cls, files: List[File], embeddings: Embeddings, vector_store: Type[VectorStore] | |
| ) -> "FolderIndex": | |
| """Creates an index from files.""" | |
| all_docs = cls._combine_files(files) | |
| index = vector_store.from_documents( | |
| documents=all_docs, | |
| embedding=embeddings, | |
| ) | |
| return cls(files=files, index=index) | |
| def embed_files( | |
| files: List[File], embedding: str, vector_store: str, **kwargs | |
| ) -> FolderIndex: | |
| model_name = "BAAI/bge-small-en" | |
| model_kwargs = {'device': 'cpu'} | |
| encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity | |
| model_norm = HuggingFaceBgeEmbeddings( | |
| model_name=model_name, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| # embeddings = OpenAIEmbeddings | |
| embeddings = model_norm | |
| return FolderIndex.from_files( | |
| files=files, embeddings=embeddings, vector_store=FAISS | |
| ) | |