File size: 2,154 Bytes
7d564bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Set up your API key for OpenAI
os.environ["OPENAI_API_KEY"] = "your_openai_api_key"

def load_document(file_path):
    """Load and parse the document."""
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    return documents

def setup_vector_store(documents):
    """Create embeddings and store them in a vector database."""
    embeddings = OpenAIEmbeddings()
    vector_store = Chroma.from_documents(documents, embeddings)
    return vector_store

def setup_retrieval_chain(vector_store):
    """Set up the conversational retrieval chain with memory."""
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    retrieval_chain = ConversationalRetrievalChain.from_llm(
        OpenAI(model_name="gpt-4"),
        retriever=vector_store.as_retriever(),
        memory=memory
    )
    return retrieval_chain

def query_document(retrieval_chain):
    """CLI loop to interactively query the document."""
    print("Interactive Document Query Tool")
    print("Type 'exit' to stop the session.\n")
    while True:
        user_query = input("Enter your question: ")
        if user_query.lower() == "exit":
            print("Exiting the query tool. Goodbye!")
            break
        response = retrieval_chain({"question": user_query})
        print("Answer:", response['answer'])
        print("\n")

def main():
    # Load the document
    file_path = input("Enter the path to your PDF document: ")
    documents = load_document(file_path)
    print("DOC Loaded")
    
    # Set up the vector store
    vector_store = setup_vector_store(documents)
    
    # Set up the retrieval chain
    retrieval_chain = setup_retrieval_chain(vector_store)
    
    # Start querying the document
    query_document(retrieval_chain)

if __name__ == "__main__":
    main()