Spaces:

Solab
/

RTV

Runtime error

App Files Files Community

RTV / app.py

Solab

Upload app.py

ec330f9 over 2 years ago

raw

history blame contribute delete

3.96 kB

	# Import standard library modules
	import os

	# Import third-party modules
	from dotenv import load_dotenv, find_dotenv
	import google.generativeai as palm
	import PyPDF2 # Import PyPDF2 library for reading PDF files

	# Import local modules
	from langchain import PromptTemplate, LLMChain
	from langchain.chains import RetrievalQA
	from langchain.chains.question_answering import load_qa_chain
	from langchain.document_loaders import UnstructuredPDFLoader, UnstructuredURLLoader
	from langchain.embeddings import GooglePalmEmbeddings
	from langchain.indexes import VectorstoreIndexCreator
	from langchain.llms import GooglePalm
	from langchain.text_splitter import CharacterTextSplitter

	# Import gradio module
	import gradio as gr

	# Load environment variables from .env file
	load_dotenv(find_dotenv())

	# Configure Google Palm API with API key
	api_key = os.environ["GOOGLE_API_KEY"]
	palm.configure(api_key=api_key)

	# Create an instance of Google Palm language model
	llm = GooglePalm()
	llm.temperature = 0.1

	# List available models that support generateText method
	models = [
	m for m in palm.list_models() if "generateText" in m.supported_generation_methods
	]
	print(f"There are {len(models)} model(s) available.")

	# Create an instance of VectorstoreIndexCreator with Google Palm embeddings and character text splitter
	index_creator = VectorstoreIndexCreator(
	embedding=GooglePalmEmbeddings(),
	text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
	)

	# Define a function that takes a PDF file and returns its text content
	def pdf_to_text(file_obj):
	# Open the PDF file using PyPDF2 library
	pdf_file = open(file_obj.name, 'rb')
	pdf_reader = PyPDF2.PdfFileReader(pdf_file)
	# Get the number of pages in the PDF file
	num_pages = pdf_reader.numPages
	# Initialize an empty string to store the text content
	text_content = ""
	# Loop through each page and extract its text
	for i in range(num_pages):
	page = pdf_reader.getPage(i)
	text_content += page.extractText()
	# Close the PDF file
	pdf_file.close()
	# Return the text content
	return text_content

	# Define a function that takes a question and a PDF file and returns the answer from the PDF chain
	def answer_question(question, pdf_file):
	# Use the pdf_to_text function to get the text content of the PDF file
	pdf_text = pdf_to_text(pdf_file)
	# Create an UnstructuredPDFLoader object from the pdf_text
	pdf_loader = UnstructuredPDFLoader(pdf_text)
	# Create an index from the PDF loader using the index creator
	pdf_index = index_creator.from_loaders([pdf_loader])
	# Create a RetrievalQA chain from the PDF index using the llm and a custom chain type
	pdf_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=pdf_index.vectorstore.as_retriever(),
	input_key="question",
	)
	# Use the PDF chain to answer questions about the document
	return pdf_chain.run(question)

	# Define a template for answering questions as an AI assistant working for Raising The Village
	template = """
	You are an artificial intelligence assistant working for Raising The village. You are asked to answer questions. The assistant gives helpful, detailed, and polite answers to the user's questions.

	{question}

	"""

	# Create a PromptTemplate object from the template with question as an input variable
	prompt = PromptTemplate(template=template, input_variables=["question"])

	# Create a LLMChain object from the prompt and the llm with verbose mode on
	llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

	# Create a gradio interface with a text input, a file input and a text output for the answer function
	interface = gr.Interface(
	fn=answer_question,
	inputs=["text", gr.inputs.File(file_types=['.pdf'])], # Add a file input for PDF files
	outputs="text",
	title="AI Assistant",
	description="Ask me anything about Raising The Village"
	)

	# Launch the interface in your browser or notebook
	interface.launch(share=True)