Spaces:
Sleeping
Sleeping
| from pymongo import MongoClient | |
| # error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain_openai import OpenAIEmbeddings | |
| # error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch | |
| from langchain_community.vectorstores import MongoDBAtlasVectorSearch | |
| # error since Jan 2024, from langchain.document_loaders import PyPDFLoader | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import os | |
| mongo_uri = os.getenv("MONGO_URI") | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| client = MongoClient(mongo_uri) | |
| dbName = "langchain_demo" | |
| collectionName = "collection_of_text_blobs" | |
| collection = client[dbName][collectionName] | |
| #loader = DirectoryLoader( './sample_files', glob="./*.txt", show_progress=True) | |
| loader = PyPDFLoader("https://arxiv.org/pdf/2303.08774.pdf") | |
| data = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0) | |
| docs = text_splitter.split_documents(data) | |
| #embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
| #vectorStore = MongoDBAtlasVectorSearch.from_documents( data, embeddings, collection=collection, index_name="default" ) | |
| # insert the documents in MongoDB Atlas Vector Search | |
| x = MongoDBAtlasVectorSearch.from_documents( | |
| documents=docs, | |
| embedding=OpenAIEmbeddings(openai_api_key=openai_api_key, disallowed_special=()), | |
| collection=collection, | |
| index_name="default" | |
| ) | |