rag-docs-demo / app.py
rairo's picture
Update app.py
ed9810c verified
raw
history blame
3.36 kB
import google.generativeai as palm
import pandas as pd
import os
import gradio as gr
import io
from langchain.llms import GooglePalm
import pandas as pd
#from yolopandas import pd
from langchain.embeddings import GooglePalmEmbeddings
# a class to create a question answering system based on information retrieval
from langchain.chains import RetrievalQA
# a class for splitting text into fixed-sized chunks with an optional overlay
from langchain.text_splitter import RecursiveCharacterTextSplitter
# a class to create a vector index using FAISS, a library for approximate nearest neighbor search
from langchain.vectorstores import FAISS
# a class for loading PDF documents from a directory
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.schema.vectorstore import VectorStoreRetriever
from dotenv import load_dotenv
load_dotenv()
palm.configure(api_key=os.environ['PALM'])
models = [m for m in palm.list_models(
) if 'generateText' in m.supported_generation_methods]
model = models[0].name
print(model)
def get_pdf_text(pdf_docs):
text=""
for pdf in pdf_docs:
pdf_reader= PdfReader(pdf)
for page in pdf_reader.pages:
text+= page.extract_text()
return text
# load PDF files from a directory
loader = PyPDFDirectoryLoader("documents/")
data = loader.load()
# print the loaded data, which is a list of tuples (file name, text extracted from the PDF)
#print(data)
# split the extracted data into text chunks using the text_splitter, which splits the text based on the specified number of characters and overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)
text_chunks = text_splitter.split_documents(data)
# print the number of chunks obtained
#print(len(text_chunks))
embeddings = GooglePalmEmbeddings(google_api_key=os.environ['PALM'])
# create embeddings for each text chunk using the FAISS class, which creates a vector index using FAISS and allows efficient searches between vectors
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
#print(type(vector_store))
def ask_pdfs(user_question):
load_dotenv()
llm = GooglePalm(temperature=0, google_api_key=os.environ['PALM'])
# Create a question answering system based on information retrieval using the RetrievalQA class, which takes as input a neural language model, a chain type and a retriever (an object that allows you to retrieve the most relevant chunks of text for a query)
retriever = VectorStoreRetriever(vectorstore=vector_store)
#qa = RetrievalQA.from_llm(llm=llm, retriever=retriever, return_source_documents=True)
qa2 = RetrievalQA.from_llm(llm=llm, retriever=retriever)
#response =qa(user_question)
res = qa2.run(user_question)
#print("Response:",response)
#ans = str(res + "Source document:"+ str(response['source_documents']))
return res
'''
def questiondocument(user_question):
load_dotenv()
llm = GooglePalm(temperature=0, google_api_key=os.environ['PALM'])
response = llm(user_question)
return response
'''
demo = gr.Interface(
fn=ask_pdfs,
inputs=["text"],
outputs=["text"],
title="UUW QuickHelper Bot",
)
demo.launch(share=True)