import os from langchain_community.document_loaders import PyPDFLoader from langchain_community.embeddings import OpenAIEmbeddings from langchain_community.vectorstores import Chroma from langchain_community.llms import OpenAI from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory # Set up your API key for OpenAI os.environ["OPENAI_API_KEY"] = "your_openai_api_key" def load_document(file_path): """Load and parse the document.""" loader = PyPDFLoader(file_path) documents = loader.load() return documents def setup_vector_store(documents): """Create embeddings and store them in a vector database.""" embeddings = OpenAIEmbeddings() vector_store = Chroma.from_documents(documents, embeddings) return vector_store def setup_retrieval_chain(vector_store): """Set up the conversational retrieval chain with memory.""" memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) retrieval_chain = ConversationalRetrievalChain.from_llm( OpenAI(model_name="gpt-4"), retriever=vector_store.as_retriever(), memory=memory ) return retrieval_chain def query_document(retrieval_chain): """CLI loop to interactively query the document.""" print("Interactive Document Query Tool") print("Type 'exit' to stop the session.\n") while True: user_query = input("Enter your question: ") if user_query.lower() == "exit": print("Exiting the query tool. Goodbye!") break response = retrieval_chain({"question": user_query}) print("Answer:", response['answer']) print("\n") def main(): # Load the document file_path = input("Enter the path to your PDF document: ") documents = load_document(file_path) print("DOC Loaded") # Set up the vector store vector_store = setup_vector_store(documents) # Set up the retrieval chain retrieval_chain = setup_retrieval_chain(vector_store) # Start querying the document query_document(retrieval_chain) if __name__ == "__main__": main()