Spaces:
Runtime error
Runtime error
| from typing import List | |
| from pinecone import Pinecone, ServerlessSpec | |
| from llama_index.vector_stores.pinecone import PineconeVectorStore | |
| from dotenv import load_dotenv | |
| from llama_index.core import ( | |
| SimpleDirectoryReader, | |
| Document, | |
| VectorStoreIndex, | |
| StorageContext, | |
| ) | |
| from huggingface_hub import HfFileSystem | |
| import os | |
| load_dotenv() | |
| # Pinecone Vector Database | |
| pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) | |
| pc_index_name = "llama-integration-pinecone" | |
| # pc_index_name = "openai-embeddings" | |
| pc_indexes = pc.list_indexes() | |
| # Check if the index already exists | |
| def index_exists(index_name): | |
| for index in pc_indexes: | |
| if index["name"] == index_name: | |
| return True | |
| return False | |
| # Create the index if it doesn't exist | |
| if not index_exists(pc_index_name): | |
| pc.create_index( | |
| name=pc_index_name, | |
| dimension=1536, | |
| spec=ServerlessSpec(cloud="aws", region="us-east-1"), | |
| ) | |
| # Initialize your index | |
| pinecone_index = pc.Index(pc_index_name) | |
| # print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True)) | |
| # print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf")) | |
| SAVE_DIR = "uploaded_files" | |
| def _namespace_exists(namespace: str): | |
| namespaces = pinecone_index.describe_index_stats()["namespaces"] | |
| return namespace in namespaces | |
| def get_pinecone_index(filename: str) -> VectorStoreIndex: | |
| """This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document.""" | |
| namespace = filename.replace(".", "_").replace(" ", "_") | |
| pinecone_vector_store = PineconeVectorStore( | |
| pinecone_index=pinecone_index, | |
| namespace=namespace, | |
| ) | |
| index = None | |
| if _namespace_exists(namespace=namespace): | |
| print(f"Namespace {namespace} exists.") | |
| index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store) | |
| else: | |
| reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"]) | |
| docs = reader.load_data(show_progress=True) | |
| storage_context = StorageContext.from_defaults( | |
| vector_store=pinecone_vector_store | |
| ) | |
| index = VectorStoreIndex.from_documents( | |
| documents=docs, show_progress=True, storage_context=storage_context | |
| ) | |
| return index |