import os from dotenv import load_dotenv import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain_google_genai import ChatGoogleGenerativeAI from langchain.llms import OpenAI def get_text(pdf): pdfreader = PdfReader(pdf) text = '' for page in pdfreader.pages: text += page.extract_text() return text def get_chunks(text): text_splitter = CharacterTextSplitter( separator='\n', chunk_size = 1000, chunk_overlap = 200, length_function = len ) chunks = text_splitter.split_text(text) return chunks def main(): response = "" load_dotenv() #GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") #OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") llm = OpenAI(openai_api_key="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") # configure streamlit st.set_page_config(page_title="Query Your PDF", page_icon=':books:') st.title("Query Your PDF") query = st.text_input("Ask your query about the pdf", value=None) with st.sidebar: st.subheader("Upload your PDF here") pdf_file = st.file_uploader("Upload", type=['pdf']) if pdf_file: text = get_text(pdf_file) chunks = get_chunks(text) embedding = OpenAIEmbeddings() database = FAISS.from_texts(chunks, embedding) if query: docs = database.similarity_search(query) chain = load_qa_chain( llm = llm, chain_type='stuff', ) response = chain.run(input_documents=docs, question=query) st.write(response) if __name__ == "__main__": main()