Spaces:
Sleeping
Sleeping
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from constants import persist_directory | |
| loader = PyPDFDirectoryLoader("docs/") | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| separators=["\n\n", "\n", ".", "!", ",", " ", ""], | |
| keep_separator=True, | |
| ) | |
| texts = text_splitter.split_documents(documents) | |
| embedding = OpenAIEmbeddings() | |
| vectordb = Chroma.from_documents( | |
| documents=texts, | |
| embedding=embedding, | |
| persist_directory=persist_directory, | |
| ) | |
| vectordb.persist() | |