Spaces:
Sleeping
Sleeping
| import os | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| # bring in our LLAMA_CLOUD_API_KEY | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| ##### LLAMAPARSE ##### | |
| from llama_parse import LlamaParse | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext | |
| from llama_index.vector_stores.qdrant import QdrantVectorStore | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| from llama_index.core import Settings | |
| ##### Qdrant ####### | |
| import qdrant_client | |
| from qdrant_client import QdrantClient, models | |
| llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY") | |
| # set up parser | |
| parser = LlamaParse(api_key=llamaparse_api_key, result_type="text") | |
| # use SimpleDirectoryReader to parse our file | |
| file_extractor = {".pdf": parser} | |
| documents = SimpleDirectoryReader( | |
| input_dir="./documents", file_extractor=file_extractor | |
| ).load_data() | |
| qdrant_url = os.getenv("QDRANT_URL") | |
| qdrant_api_key = os.getenv("QDRANT_API_KEY") | |
| embed_model = OpenAIEmbedding(model="text-embedding-3-large") | |
| Settings.embed_model = embed_model | |
| from llama_index.llms.openai import OpenAI | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_api_key) | |
| Settings.llm = llm | |
| client = qdrant_client.QdrantClient( | |
| api_key=qdrant_api_key, | |
| url=qdrant_url, | |
| ) | |
| ###Creating New Collection on Qdrant Not needed### | |
| # client.create_collection( | |
| # collection_name="RAG_test", | |
| # vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE), | |
| # ) | |
| vector_store = QdrantVectorStore(client=client, collection_name="RAG_Test") | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| index = VectorStoreIndex.from_documents( | |
| documents=documents, storage_context=storage_context, show_progress=True | |
| ) | |
| index.storage_context.persist() | |