Spaces:
Runtime error
Runtime error
File size: 2,154 Bytes
7d564bd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
# Set up your API key for OpenAI
os.environ["OPENAI_API_KEY"] = "your_openai_api_key"
def load_document(file_path):
"""Load and parse the document."""
loader = PyPDFLoader(file_path)
documents = loader.load()
return documents
def setup_vector_store(documents):
"""Create embeddings and store them in a vector database."""
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(documents, embeddings)
return vector_store
def setup_retrieval_chain(vector_store):
"""Set up the conversational retrieval chain with memory."""
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retrieval_chain = ConversationalRetrievalChain.from_llm(
OpenAI(model_name="gpt-4"),
retriever=vector_store.as_retriever(),
memory=memory
)
return retrieval_chain
def query_document(retrieval_chain):
"""CLI loop to interactively query the document."""
print("Interactive Document Query Tool")
print("Type 'exit' to stop the session.\n")
while True:
user_query = input("Enter your question: ")
if user_query.lower() == "exit":
print("Exiting the query tool. Goodbye!")
break
response = retrieval_chain({"question": user_query})
print("Answer:", response['answer'])
print("\n")
def main():
# Load the document
file_path = input("Enter the path to your PDF document: ")
documents = load_document(file_path)
print("DOC Loaded")
# Set up the vector store
vector_store = setup_vector_store(documents)
# Set up the retrieval chain
retrieval_chain = setup_retrieval_chain(vector_store)
# Start querying the document
query_document(retrieval_chain)
if __name__ == "__main__":
main()
|