Spaces:
Build error
Build error
| #from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.llms import OpenAI | |
| from langchain.chains import ConversationalRetrievalChain, RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| from langchain.document_loaders import TextLoader, PyPDFLoader | |
| from typing import Optional | |
| import os | |
| embeddings_model_name ="multi-qa-MiniLM-L6-cos-v1" | |
| persist_directory = "db" | |
| target_source_chunks = 4 | |
| openai_api_key = os.environ.get('OPENAI_API_KEY') | |
| #embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) | |
| embeddings = SentenceTransformerEmbeddings(model_name=embeddings_model_name) | |
| def load_vectorestore_from_pdf(path:str, embeddings=embeddings, persist:Optional[bool]=True): | |
| loader = PyPDFLoader(path) | |
| documents = loader.load() | |
| #print(len(documents)) | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| documents = text_splitter.split_documents(documents) | |
| #print(len(documents)) | |
| if not persist: | |
| vectorstore = Chroma.from_documents(documents, embeddings, persist_directory=None) | |
| return vectorstore | |
| vectorstore = Chroma.from_documents(documents, embeddings, persist_directory=persist_directory) | |
| vectorstore.persist() | |
| vectorstore = None | |
| return None | |
| if __name__ == "__main__": | |
| load_vectorestore_from_pdf() |