Spaces:

Quantilytix
/

rag-docs-demo

Runtime error

App Files Files

rag-docs-demo / app.py

rairo

Update app.py

ed9810c verified almost 2 years ago

raw

history blame

3.36 kB


	import google.generativeai as palm
	import pandas as pd

	import os

	import gradio as gr
	import io

	from langchain.llms import GooglePalm
	import pandas as pd
	#from yolopandas import pd

	from langchain.embeddings import GooglePalmEmbeddings
	# a class to create a question answering system based on information retrieval
	from langchain.chains import RetrievalQA
	# a class for splitting text into fixed-sized chunks with an optional overlay
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	# a class to create a vector index using FAISS, a library for approximate nearest neighbor search
	from langchain.vectorstores import FAISS
	# a class for loading PDF documents from a directory
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.chains.question_answering import load_qa_chain
	from langchain.chains import ConversationalRetrievalChain
	from langchain.schema.vectorstore import VectorStoreRetriever

	from dotenv import load_dotenv

	load_dotenv()




	palm.configure(api_key=os.environ['PALM'])

	models = [m for m in palm.list_models(
	) if 'generateText' in m.supported_generation_methods]
	model = models[0].name
	print(model)

	def get_pdf_text(pdf_docs):
	text=""
	for pdf in pdf_docs:
	pdf_reader= PdfReader(pdf)
	for page in pdf_reader.pages:
	text+= page.extract_text()
	return text

	# load PDF files from a directory
	loader = PyPDFDirectoryLoader("documents/")
	data = loader.load()

	# print the loaded data, which is a list of tuples (file name, text extracted from the PDF)
	#print(data)

	# split the extracted data into text chunks using the text_splitter, which splits the text based on the specified number of characters and overlap
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)

	text_chunks = text_splitter.split_documents(data)

	# print the number of chunks obtained
	#print(len(text_chunks))

	embeddings = GooglePalmEmbeddings(google_api_key=os.environ['PALM'])

	# create embeddings for each text chunk using the FAISS class, which creates a vector index using FAISS and allows efficient searches between vectors
	vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)

	#print(type(vector_store))



	def ask_pdfs(user_question):
	load_dotenv()



	llm = GooglePalm(temperature=0, google_api_key=os.environ['PALM'])

	# Create a question answering system based on information retrieval using the RetrievalQA class, which takes as input a neural language model, a chain type and a retriever (an object that allows you to retrieve the most relevant chunks of text for a query)
	retriever = VectorStoreRetriever(vectorstore=vector_store)
	#qa = RetrievalQA.from_llm(llm=llm, retriever=retriever, return_source_documents=True)
	qa2 = RetrievalQA.from_llm(llm=llm, retriever=retriever)
	#response =qa(user_question)
	res = qa2.run(user_question)
	#print("Response:",response)
	#ans = str(res + "Source document:"+ str(response['source_documents']))
	return res





	'''

	def questiondocument(user_question):
	load_dotenv()



	llm = GooglePalm(temperature=0, google_api_key=os.environ['PALM'])
	response = llm(user_question)

	return response
	'''
	demo = gr.Interface(

	fn=ask_pdfs,
	inputs=["text"],
	outputs=["text"],
	title="UUW QuickHelper Bot",
	)
	demo.launch(share=True)