from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext from llama_index.vector_stores import PineconeVectorStore from llama_index.storage.storage_context import StorageContext import pinecone import logging import sys import os import openai from pymongo.mongo_client import MongoClient from datetime import datetime logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) pinecone_key = os.environ['PINECONE_KEY'] globalkey=os.environ["global-key"] chriskey=os.environ["chris_key"] mongo_key=os.environ["MONGO_KEY"] chrisBrains=["watchman"] def isBrainFound(brainName): if(brainName=="global-tekmed"): pinecone.init(globalkey, environment="us-west4-gcp") elif(brainName in chrisBrains): pinecone.init(chriskey, environment="us-west4-gcp") else: pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") active_indexes = pinecone.list_indexes() print(active_indexes) brainName = brainName.lower() if brainName in active_indexes: return True else: return False def add_file(collection,username, filename): document = { "brain": username, "filename": filename, "namespace":None, "timestamp": datetime.utcnow() } collection.insert_one(document) def delete_file(collection,username, filename): query = { "brain": username, "filename": filename } collection.delete_one(query) def updateBrain(files,brainName): if(brainName=="unchain3d-demo" or brainName=="Unchain3d-demo"): return "updated" if(isBrainFound(brainName)==False): return "brain not found" print(len(files)) print("Updating brain") print(brainName) if(brainName=="global-tekmed"): pinecone.init(globalkey, environment="us-west4-gcp") elif(brainName in chrisBrains): pinecone.init(chriskey, environment="us-west4-gcp") else: pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") documents = [] corrupt = [] newfiles = [] i = 0 for file in files: loader = SimpleDirectoryReader(input_files=[file.name]) try: document1 = loader.load_data() j=1 for doc in document1: doc.doc_id=os.path.basename(file.name) doc.extra_info={"filename":os.path.basename(file.name),"pageno":j} documents.append(doc) j=j+1 newfiles.append(os.path.basename(file.name)) except Exception as e: print(e) if (str(e) == "Cannot read an empty file"): return "Please Wait! Files are uploading, Try again Later!" corrupt.append(os.path.basename(file.name)) i = i+1 print(i) pindex = pinecone.Index(brainName) try: print(pindex.describe_index_stats()) except Exception as e: print(e) vector_store = PineconeVectorStore(pinecone_index=pindex) storage_context = StorageContext.from_defaults(vector_store=vector_store) service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20) index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context) client = MongoClient(mongo_key) db = client['nbrain'] collection = db['files'] for prevfile in newfiles: index.delete_ref_doc(prevfile, delete_from_docstore=True) delete_file(collection,brainName,prevfile) uploadedFiles=[] j = 1 fileProcessing=documents[0].doc_id for doc in documents: try: index.insert(doc) if doc.doc_id not in uploadedFiles: print(doc.doc_id) add_file(collection,brainName,doc.doc_id) uploadedFiles.append(doc.doc_id) print(j) j = j+1 if(fileProcessing!=doc.doc_id): # yield fileProcessing fileProcessing=doc.doc_id except Exception as e: if doc.doc_id not in corrupt: corrupt.append(doc.doc_id) print("ERROR : "+str(e)) print("Brain Updated: "+brainName) try: print(pindex.describe_index_stats()) except Exception as e: print(e) print(corrupt) if (len(corrupt) > 0): print("Brain Updated! Below files are corrupt/unformatted, and not added to the brain. "+ str(corrupt)) return """Brain Updated! Below files are corrupt/unformatted, and not added to the brain. """ + str(corrupt) return "updated"