pdf_summ / app.py
lokesh2002's picture
Update app.py
fdafa82 verified
import os
from dotenv import load_dotenv
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.llms import OpenAI
def get_text(pdf):
pdfreader = PdfReader(pdf)
text = ''
for page in pdfreader.pages:
text += page.extract_text()
return text
def get_chunks(text):
text_splitter = CharacterTextSplitter(
separator='\n',
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text)
return chunks
def main():
response = ""
load_dotenv()
#GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
llm = OpenAI(openai_api_key="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")
# configure streamlit
st.set_page_config(page_title="Query Your PDF", page_icon=':books:')
st.title("Query Your PDF")
query = st.text_input("Ask your query about the pdf", value=None)
with st.sidebar:
st.subheader("Upload your PDF here")
pdf_file = st.file_uploader("Upload", type=['pdf'])
if pdf_file:
text = get_text(pdf_file)
chunks = get_chunks(text)
embedding = OpenAIEmbeddings()
database = FAISS.from_texts(chunks, embedding)
if query:
docs = database.similarity_search(query)
chain = load_qa_chain(
llm = llm,
chain_type='stuff',
)
response = chain.run(input_documents=docs, question=query)
st.write(response)
if __name__ == "__main__":
main()