doc-try / app.py
arnagupta2003's picture
Create app.py
7d564bd verified
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
# Set up your API key for OpenAI
os.environ["OPENAI_API_KEY"] = "your_openai_api_key"
def load_document(file_path):
"""Load and parse the document."""
loader = PyPDFLoader(file_path)
documents = loader.load()
return documents
def setup_vector_store(documents):
"""Create embeddings and store them in a vector database."""
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(documents, embeddings)
return vector_store
def setup_retrieval_chain(vector_store):
"""Set up the conversational retrieval chain with memory."""
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retrieval_chain = ConversationalRetrievalChain.from_llm(
OpenAI(model_name="gpt-4"),
retriever=vector_store.as_retriever(),
memory=memory
)
return retrieval_chain
def query_document(retrieval_chain):
"""CLI loop to interactively query the document."""
print("Interactive Document Query Tool")
print("Type 'exit' to stop the session.\n")
while True:
user_query = input("Enter your question: ")
if user_query.lower() == "exit":
print("Exiting the query tool. Goodbye!")
break
response = retrieval_chain({"question": user_query})
print("Answer:", response['answer'])
print("\n")
def main():
# Load the document
file_path = input("Enter the path to your PDF document: ")
documents = load_document(file_path)
print("DOC Loaded")
# Set up the vector store
vector_store = setup_vector_store(documents)
# Set up the retrieval chain
retrieval_chain = setup_retrieval_chain(vector_store)
# Start querying the document
query_document(retrieval_chain)
if __name__ == "__main__":
main()