Spaces:
Sleeping
Sleeping
| from langchain_qdrant import QdrantVectorStore | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.http.models import Distance, VectorParams | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| url=os.getenv('QDRANT_URL') | |
| api_key=os.getenv('QDRANT_API_KEY') | |
| client=QdrantClient( | |
| url=url, | |
| api_key=api_key, | |
| ) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| loader1 = PyPDFLoader("sam-a16.pdf") | |
| loader2 = PyPDFLoader("sam-s25.pdf") | |
| loader3 = PyPDFLoader("sam-fold.pdf") | |
| docs1 = loader1.load() | |
| docs2 = loader2.load() | |
| docs3 = loader3.load() | |
| docs = docs1 + docs2 + docs3 | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, # chunk size (characters) | |
| chunk_overlap=200, # chunk overlap (characters) | |
| add_start_index=True, # track index in original document | |
| ) | |
| all_splits = text_splitter.split_documents(docs) | |
| client.create_collection( | |
| collection_name="multidoc-rag-agent", | |
| vectors_config=VectorParams(size=768, distance=Distance.COSINE), | |
| ) | |
| print(f"Split blog post into {len(all_splits)} sub-documents.") | |
| vector_store = QdrantVectorStore(client=client, embedding=embeddings, collection_name="multidoc-rag-agent") | |
| vector_store.add_documents(all_splits) | |
| print("Documents stored in Qdrant.") |