Spaces:
Runtime error
Runtime error
| from dotenv import load_dotenv | |
| # langchain libraries | |
| from langchain.document_loaders import DirectoryLoader, TextLoader | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS, Pinecone | |
| import pinecone | |
| import openai | |
| import os | |
| load_dotenv() | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
| def generate_pincone_vector_store(index_name='btc-chat-bot'): | |
| pinecone.init() | |
| pinecone.create_index("test-index", dimension=1536, metric='cosine') | |
| pinecone.list_indexes() | |
| result = Pinecone.from_documents(documents, embeddings, index_name) | |
| return result | |
| def load_local_vector_store(index_name='hr_faiss_index'): | |
| embeddings = OpenAIEmbeddings() | |
| try: | |
| vector_store = FAISS.load_local(index_name, embeddings) | |
| print("Local VectorDB Found.") | |
| return vector_store | |
| except Exception as e: | |
| print(e) | |
| return None | |
| def load_local_documents(): | |
| doc_dir = os.path.join(os.getcwd() + '/docs', 'processed') | |
| loader = DirectoryLoader(doc_dir) | |
| documents = loader.load() | |
| assert len(documents) > 0 | |
| return documents | |
| def generate_new_vector_store(index_name='hr_faiss_index'): | |
| print("No Local VectorDB Found. Generating new one...") | |
| documents = load_local_documents() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, chunk_overlap=0, separators=["\n", "\r\n", "\r", " "]) | |
| documents = text_splitter.split_documents(documents) | |
| embeddings = OpenAIEmbeddings() | |
| vector_store = FAISS.from_documents(documents, embeddings) | |
| vector_store.save_local(index_name) | |
| return vector_store | |
| def get_or_create_vector_store(index_name='hr_faiss_index'): | |
| vector_store = load_local_vector_store(index_name) | |
| if vector_store is None: | |
| vector_store = generate_new_vector_store(index_name) | |
| return vector_store | |
| if __name__ == "__main__": | |
| vector = get_or_create_vector_store() | |
| print(vector) | |