Spaces:

hunterXdk
/

RagModels

Sleeping

App Files Files Community

RagModels / chatbot.py

hunterXdk

Initial Commit With ❤

50e4be7 verified over 1 year ago

raw

history blame

2.88 kB

	def get_pdf_text(pdf_docs):
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# chuck_size = 1000, chunk_overlap = 200 (for shorted PDFs)
	def get_text_chunks(text):
	text_splitter= RecursiveCharacterTextSplitter(
	chunk_size=10000,
	chunk_overlap=1000,
	# length_function=len
	)
	chunks=text_splitter.split_text(text)
	return chunks

	# Converting into Vector data/store (can also be stored)
	def get_vector_store(text_chunks):
	# embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
	embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
	vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
	vector_store.save_local("faiss_index")
	# return vector_store

	def get_conversation_chain():
	prompt_template="""Answer the query as detailed as possible from the provided context, make sure to provide all the details, if answeris not in
	the provided context, just say, "Answer is not available in the provided documents", don't provide the wrong answer:\n {context}? \n Query: {query}? \n
	Answer:
	"""

	model=ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
	prompt=PromptTemplate(template=prompt_template, input_variables=["context", "query"])
	# chain=load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
	chain=load_qa_chain(model, chain_type="stuff", prompt=prompt)
	return chain

	def user_input(user_question):
	# embeddings = GoogleGenerativeAIEmbeddings(model='embedding-gecko-001')
	embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

	# Loading the embeddings
	new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
	docs = new_db.similarity_search(user_question)

	chain=get_conversation_chain()

	response = chain(
	{"input_documents": docs, "question": user_question}
	, return_only_outputs=True)

	print(response)
	st.write("Reply: ", response["output_text"])

	# Frontend page Processor
	def main():
	st.set_page_config(page_title="PDF Chatbot")
	st.header("PDF Chatbot made with ❤")

	user_question = st.text_input("Ask a question about your documents:")

	if user_question:
	user_input(user_question)

	with st.sidebar:
	st.title("Menu:")
	pdf_docs = st.file_uploader(
	"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
	if st.button("Submit & Process"):
	with st.spinner("Ruko Padh raha hu..."):
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	get_vector_store(text_chunks)
	st.success("Saare documents padh liya. Ab swaal pucho 😤")


	if __name__ == '__main__':
	main()