Spaces:
Build error
Build error
| import os | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| from langchain.document_loaders import DirectoryLoader | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.vectorstores import Qdrant | |
| from qdrant_client import QdrantClient | |
| embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") | |
| client = QdrantClient( | |
| url=os.getenv("QDRANT_URL", "https://QDRANT_URL.europe-west3-0.gcp.cloud.qdrant.io"), | |
| api_key=os.getenv("QDRANT_API_KEY"), | |
| prefer_grpc=False | |
| ) | |
| loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| texts = text_splitter.split_documents(documents) | |
| qdrant = Qdrant.from_documents( | |
| texts, | |
| embeddings, | |
| client=client, | |
| collection_name="vector_db" | |
| ) | |
| #print("Vector DB Successfully Created!") |