Spaces:

rakeshkumar1812
/

RAG-with-LangChain-GenAI-Any-pdf

Runtime error

App Files Files Community

RAG-with-LangChain-GenAI-Any-pdf / app.py

rakeshkumar1812

Upload 2 files

924118d verified almost 2 years ago

raw

history blame contribute delete

4.93 kB

	import streamlit as st
	from langchain.prompts import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain_community.vectorstores.faiss import FAISS
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	from dotenv import load_dotenv
	import PyPDF2
	import os
	import io

	# st.title("Chat Your PDFs") # Updated title
	st.set_page_config(layout="centered")
	st.markdown("<h1 style='font-size:24px;'>RAG with LangChain & GenAI: Any PDF</h1>", unsafe_allow_html=True)

	# Load environment variables from .env file
	load_dotenv()

	# Retrieve API key from environment variable
	google_api_key = os.getenv("GOOGLE_API_KEY")

	# Check if the API key is available
	if google_api_key is None:
	st.warning("API key not found. Please set the google_api_key environment variable.")
	st.stop()

	# File Upload with user-defined name
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	prompt_template = """
	Answer the question as detailed as possible from the provided context,
	make sure to provide all the details, if the answer is not in
	provided context just say, "answer is not available in the context",
	don't provide the wrong answer\n\n
	Context:\n {context}?\n
	Question: \n{question}\n
	Answer:
	"""

	# Additional prompts to enhance the template
	prompt_template = prompt_template + """
	--------------------------------------------------
	Prompt Suggestions:
	1. Summarize the main idea of the context.
	2. Provide a detailed explanation of the key concepts mentioned in the context.
	3. Identify any supporting evidence or examples that can be used to answer the question.
	4. Analyze any trends or patterns mentioned in the context that are relevant to the question.
	5. Compare and contrast different aspects or viewpoints presented in the context.
	6. Discuss any implications or consequences of the information provided in the context.
	7. Evaluate the reliability or credibility of the information presented in the context.
	8. Offer recommendations or suggestions based on the information provided.
	9. Predict potential future developments or outcomes based on the context.
	10. Provide additional context or background information relevant to the question.
	11. Explain any technical terms or jargon used in the context.
	12. Interpret any charts, graphs, or visual aids included in the context.
	13. Discuss any limitations or caveats that should be considered when answering the question.
	14. Address any potential biases or assumptions present in the context.
	15. Offer alternative perspectives or interpretations of the information provided.
	16. Discuss any ethical considerations or implications raised by the context.
	17. Analyze any cause-and-effect relationships mentioned in the context.
	18. Identify any unanswered questions or areas for further investigation.
	19. Clarify any ambiguities or inconsistencies in the context.
	20. Provide examples or case studies that illustrate the concepts discussed in the context.
	"""

	# Return the enhanced prompt template
	prompt_template = prompt_template + """
	--------------------------------------------------
	Context:\n{context}\n
	Question:\n{question}\n
	Answer:
	"""

	if uploaded_file is not None:
	st.text("PDF File Uploaded Successfully!")

	# PDF Processing (using PyPDF2 directly)
	pdf_data = uploaded_file.read()
	pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
	pdf_pages = pdf_reader.pages

	context = "\n\n".join(page.extract_text() for page in pdf_pages)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
	texts = text_splitter.split_text(context)
	embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	# vector_index = Chroma.from_texts(texts, embeddings).as_retriever()
	vector_index = FAISS.from_texts(texts, embeddings).as_retriever()

	user_question = st.text_input("Enter your Question below:", "")



	if st.button("Get Answer"):
	if user_question:
	with st.spinner("Processing..."):
	# Get Relevant Documents
	docs = vector_index.get_relevant_documents(user_question)
	prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
	model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key)
	chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
	response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
	st.subheader("Answer:")
	st.write(response['output_text'])

	else:
	st.warning("Please enter a question.")