Spaces:
Sleeping
Sleeping
| from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext | |
| from llama_index.vector_stores import PineconeVectorStore | |
| from llama_index.storage.storage_context import StorageContext | |
| import pinecone | |
| import logging | |
| import sys | |
| import os | |
| import openai | |
| from pymongo.mongo_client import MongoClient | |
| from datetime import datetime | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
| logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
| pinecone_key = os.environ['PINECONE_KEY'] | |
| globalkey=os.environ["global-key"] | |
| chriskey=os.environ["chris_key"] | |
| mongo_key=os.environ["MONGO_KEY"] | |
| chrisBrains=["watchman"] | |
| def isBrainFound(brainName): | |
| if(brainName=="global-tekmed"): | |
| pinecone.init(globalkey, | |
| environment="us-west4-gcp") | |
| elif(brainName in chrisBrains): | |
| pinecone.init(chriskey, | |
| environment="us-west4-gcp") | |
| else: | |
| pinecone.init(api_key=pinecone_key, | |
| environment="us-west4-gcp") | |
| active_indexes = pinecone.list_indexes() | |
| print(active_indexes) | |
| brainName = brainName.lower() | |
| if brainName in active_indexes: | |
| return True | |
| else: | |
| return False | |
| def add_file(collection,username, filename): | |
| document = { | |
| "brain": username, | |
| "filename": filename, | |
| "namespace":None, | |
| "timestamp": datetime.utcnow() | |
| } | |
| collection.insert_one(document) | |
| def delete_file(collection,username, filename): | |
| query = { | |
| "brain": username, | |
| "filename": filename | |
| } | |
| collection.delete_one(query) | |
| def updateBrain(files,brainName): | |
| if(brainName=="unchain3d-demo" or brainName=="Unchain3d-demo"): | |
| return "updated" | |
| if(isBrainFound(brainName)==False): | |
| return "brain not found" | |
| print(len(files)) | |
| print("Updating brain") | |
| print(brainName) | |
| if(brainName=="global-tekmed"): | |
| pinecone.init(globalkey, | |
| environment="us-west4-gcp") | |
| elif(brainName in chrisBrains): | |
| pinecone.init(chriskey, | |
| environment="us-west4-gcp") | |
| else: | |
| pinecone.init(api_key=pinecone_key, | |
| environment="us-west4-gcp") | |
| documents = [] | |
| corrupt = [] | |
| newfiles = [] | |
| i = 0 | |
| for file in files: | |
| loader = SimpleDirectoryReader(input_files=[file.name]) | |
| try: | |
| document1 = loader.load_data() | |
| j=1 | |
| for doc in document1: | |
| doc.doc_id=os.path.basename(file.name) | |
| doc.extra_info={"filename":os.path.basename(file.name),"pageno":j} | |
| documents.append(doc) | |
| j=j+1 | |
| newfiles.append(os.path.basename(file.name)) | |
| except Exception as e: | |
| print(e) | |
| if (str(e) == "Cannot read an empty file"): | |
| return "Please Wait! Files are uploading, Try again Later!" | |
| corrupt.append(os.path.basename(file.name)) | |
| i = i+1 | |
| print(i) | |
| pindex = pinecone.Index(brainName) | |
| try: | |
| print(pindex.describe_index_stats()) | |
| except Exception as e: | |
| print(e) | |
| vector_store = PineconeVectorStore(pinecone_index=pindex) | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20) | |
| index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context) | |
| client = MongoClient(mongo_key) | |
| db = client['nbrain'] | |
| collection = db['files'] | |
| for prevfile in newfiles: | |
| index.delete_ref_doc(prevfile, delete_from_docstore=True) | |
| delete_file(collection,brainName,prevfile) | |
| uploadedFiles=[] | |
| j = 1 | |
| fileProcessing=documents[0].doc_id | |
| for doc in documents: | |
| try: | |
| index.insert(doc) | |
| if doc.doc_id not in uploadedFiles: | |
| print(doc.doc_id) | |
| add_file(collection,brainName,doc.doc_id) | |
| uploadedFiles.append(doc.doc_id) | |
| print(j) | |
| j = j+1 | |
| if(fileProcessing!=doc.doc_id): | |
| # yield fileProcessing | |
| fileProcessing=doc.doc_id | |
| except Exception as e: | |
| if doc.doc_id not in corrupt: | |
| corrupt.append(doc.doc_id) | |
| print("ERROR : "+str(e)) | |
| print("Brain Updated: "+brainName) | |
| try: | |
| print(pindex.describe_index_stats()) | |
| except Exception as e: | |
| print(e) | |
| print(corrupt) | |
| if (len(corrupt) > 0): | |
| print("Brain Updated! Below files are corrupt/unformatted, and not added to the brain. "+ str(corrupt)) | |
| return """Brain Updated! | |
| Below files are corrupt/unformatted, and not added to the brain. | |
| """ + str(corrupt) | |
| return "updated" | |