# Import standard library modules import os # Import third-party modules from dotenv import load_dotenv, find_dotenv import google.generativeai as palm import PyPDF2 # Import PyPDF2 library for reading PDF files # Import local modules from langchain import PromptTemplate, LLMChain from langchain.chains import RetrievalQA from langchain.chains.question_answering import load_qa_chain from langchain.document_loaders import UnstructuredPDFLoader, UnstructuredURLLoader from langchain.embeddings import GooglePalmEmbeddings from langchain.indexes import VectorstoreIndexCreator from langchain.llms import GooglePalm from langchain.text_splitter import CharacterTextSplitter # Import gradio module import gradio as gr # Load environment variables from .env file load_dotenv(find_dotenv()) # Configure Google Palm API with API key api_key = os.environ["GOOGLE_API_KEY"] palm.configure(api_key=api_key) # Create an instance of Google Palm language model llm = GooglePalm() llm.temperature = 0.1 # List available models that support generateText method models = [ m for m in palm.list_models() if "generateText" in m.supported_generation_methods ] print(f"There are {len(models)} model(s) available.") # Create an instance of VectorstoreIndexCreator with Google Palm embeddings and character text splitter index_creator = VectorstoreIndexCreator( embedding=GooglePalmEmbeddings(), text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0), ) # Define a function that takes a PDF file and returns its text content def pdf_to_text(file_obj): # Open the PDF file using PyPDF2 library pdf_file = open(file_obj.name, 'rb') pdf_reader = PyPDF2.PdfFileReader(pdf_file) # Get the number of pages in the PDF file num_pages = pdf_reader.numPages # Initialize an empty string to store the text content text_content = "" # Loop through each page and extract its text for i in range(num_pages): page = pdf_reader.getPage(i) text_content += page.extractText() # Close the PDF file pdf_file.close() # Return the text content return text_content # Define a function that takes a question and a PDF file and returns the answer from the PDF chain def answer_question(question, pdf_file): # Use the pdf_to_text function to get the text content of the PDF file pdf_text = pdf_to_text(pdf_file) # Create an UnstructuredPDFLoader object from the pdf_text pdf_loader = UnstructuredPDFLoader(pdf_text) # Create an index from the PDF loader using the index creator pdf_index = index_creator.from_loaders([pdf_loader]) # Create a RetrievalQA chain from the PDF index using the llm and a custom chain type pdf_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=pdf_index.vectorstore.as_retriever(), input_key="question", ) # Use the PDF chain to answer questions about the document return pdf_chain.run(question) # Define a template for answering questions as an AI assistant working for Raising The Village template = """ You are an artificial intelligence assistant working for Raising The village. You are asked to answer questions. The assistant gives helpful, detailed, and polite answers to the user's questions. {question} """ # Create a PromptTemplate object from the template with question as an input variable prompt = PromptTemplate(template=template, input_variables=["question"]) # Create a LLMChain object from the prompt and the llm with verbose mode on llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True) # Create a gradio interface with a text input, a file input and a text output for the answer function interface = gr.Interface( fn=answer_question, inputs=["text", gr.inputs.File(file_types=['.pdf'])], # Add a file input for PDF files outputs="text", title="AI Assistant", description="Ask me anything about Raising The Village" ) # Launch the interface in your browser or notebook interface.launch(share=True)